-
Notifications
You must be signed in to change notification settings - Fork 270
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
make host networking optional (#270)
* make host networking optional (attribution: Leo Palmer Sunmo @leosunmo) * update helm readme and add hostnetworking=false test * generate queue-processor assets * updated test output
- Loading branch information
Showing
7 changed files
with
224 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
#!/bin/bash | ||
set -euo pipefail | ||
|
||
# Available env vars: | ||
# $TMP_DIR | ||
# $CLUSTER_NAME | ||
# $KUBECONFIG | ||
# $NODE_TERMINATION_HANDLER_DOCKER_REPO | ||
# $NODE_TERMINATION_HANDLER_DOCKER_TAG | ||
# $WEBHOOK_DOCKER_REPO | ||
# $WEBHOOK_DOCKER_TAG | ||
# $AEMM_URL | ||
# $AEMM_VERSION | ||
|
||
function fail_and_exit { | ||
echo "❌ Spot Interruption w/o Host Networking test failed $CLUSTER_NAME ❌" | ||
exit ${1:-1} | ||
} | ||
|
||
echo "Starting Spot Interruption w/o Host Networking Test for Node Termination Handler" | ||
|
||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" | ||
|
||
common_helm_args=() | ||
[[ "${TEST_WINDOWS-}" == "true" ]] && common_helm_args+=(--set targetNodeOs="windows") | ||
[[ -n "${NTH_WORKER_LABEL-}" ]] && common_helm_args+=(--set nodeSelector."$NTH_WORKER_LABEL") | ||
|
||
anth_helm_args=( | ||
upgrade | ||
--install | ||
"$CLUSTER_NAME-anth" | ||
"$SCRIPTPATH/../../config/helm/aws-node-termination-handler/" | ||
--wait | ||
--force | ||
--namespace kube-system | ||
--set instanceMetadataURL="${INSTANCE_METADATA_URL:-"http://$AEMM_URL:$IMDS_PORT"}" | ||
--set image.repository="$NODE_TERMINATION_HANDLER_DOCKER_REPO" | ||
--set image.tag="$NODE_TERMINATION_HANDLER_DOCKER_TAG" | ||
--set enableScheduledEventDraining="false" | ||
--set enableSpotInterruptionDraining="true" | ||
--set taintNode="true" | ||
--set useHostNetwork="false" | ||
--set tolerations="" | ||
) | ||
[[ -n "${NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY-}" ]] && | ||
anth_helm_args+=(--set image.pullPolicy="$NODE_TERMINATION_HANDLER_DOCKER_PULL_POLICY") | ||
[[ ${#common_helm_args[@]} -gt 0 ]] && | ||
anth_helm_args+=("${common_helm_args[@]}") | ||
|
||
set -x | ||
helm "${anth_helm_args[@]}" | ||
set +x | ||
|
||
emtp_helm_args=( | ||
upgrade | ||
--install | ||
"$CLUSTER_NAME-emtp" | ||
"$SCRIPTPATH/../../config/helm/webhook-test-proxy/" | ||
--wait | ||
--force | ||
--namespace default | ||
--set webhookTestProxy.image.repository="$WEBHOOK_DOCKER_REPO" | ||
--set webhookTestProxy.image.tag="$WEBHOOK_DOCKER_TAG" | ||
) | ||
[[ -n "${WEBHOOK_DOCKER_PULL_POLICY-}" ]] && | ||
emtp_helm_args+=(--set webhookTestProxy.image.pullPolicy="$WEBHOOK_DOCKER_PULL_POLICY") | ||
[[ ${#common_helm_args[@]} -gt 0 ]] && | ||
emtp_helm_args+=("${common_helm_args[@]}") | ||
|
||
set -x | ||
helm "${emtp_helm_args[@]}" | ||
set +x | ||
|
||
aemm_helm_args=( | ||
upgrade | ||
--install | ||
"$CLUSTER_NAME-aemm" | ||
"$AEMM_DL_URL" | ||
--wait | ||
--namespace default | ||
--set servicePort="$IMDS_PORT" | ||
--set 'tolerations[0].effect=NoSchedule' | ||
--set 'tolerations[0].operator=Exists' | ||
--set arguments='{spot}' | ||
) | ||
[[ ${#common_helm_args[@]} -gt 0 ]] && | ||
aemm_helm_args+=("${common_helm_args[@]}") | ||
|
||
set -x | ||
retry 5 helm "${aemm_helm_args[@]}" | ||
set +x | ||
|
||
TAINT_CHECK_CYCLES=15 | ||
TAINT_CHECK_SLEEP=15 | ||
|
||
deployed=0 | ||
for i in `seq 1 $TAINT_CHECK_CYCLES`; do | ||
if [[ $(kubectl get deployments regular-pod-test -o jsonpath='{.status.unavailableReplicas}') -eq 0 ]]; then | ||
echo "✅ Verified regular-pod-test pod was scheduled and started!" | ||
deployed=1 | ||
break | ||
fi | ||
echo "Setup Loop $i/$TAINT_CHECK_CYCLES, sleeping for $TAINT_CHECK_SLEEP seconds" | ||
sleep $TAINT_CHECK_SLEEP | ||
done | ||
|
||
if [[ $deployed -eq 0 ]]; then | ||
echo "❌ regular-pod-test pod deployment failed" | ||
fail_and_exit 2 | ||
fi | ||
|
||
cordoned=0 | ||
tainted=0 | ||
test_node=${TEST_NODE:-$CLUSTER_NAME-worker} | ||
for i in `seq 1 $TAINT_CHECK_CYCLES`; do | ||
if [[ $cordoned -eq 0 ]] && kubectl get nodes $test_node | grep SchedulingDisabled >/dev/null; then | ||
echo "✅ Verified the worker node was cordoned!" | ||
cordoned=1 | ||
fi | ||
|
||
if [[ $cordoned -eq 1 && $tainted -eq 0 ]] && kubectl get nodes $test_node -o json | grep -q "aws-node-termination-handler/spot-itn" >/dev/null; then | ||
echo "✅ Verified the worked node was tainted!" | ||
tainted=1 | ||
fi | ||
|
||
if [[ $tainted -eq 1 && $(kubectl get deployments regular-pod-test -o=jsonpath='{.status.unavailableReplicas}') -eq 1 ]]; then | ||
echo "✅ Verified the regular-pod-test pod was evicted!" | ||
echo "✅ Spot Interruption w/o Host Networking Test Passed $CLUSTER_NAME! ✅" | ||
exit 0 | ||
fi | ||
echo "Assertion Loop $i/$TAINT_CHECK_CYCLES, sleeping for $TAINT_CHECK_SLEEP seconds" | ||
sleep $TAINT_CHECK_SLEEP | ||
done | ||
|
||
if [[ $cordoned -eq 0 ]]; then | ||
echo "❌ Worker node was not cordoned" | ||
elif [[ $tainted -eq 0 ]]; then | ||
echo "❌ Worker node was not tainted" | ||
else | ||
echo "❌ regular-pod-test pod was not evicted" | ||
fi | ||
fail_and_exit 1 |