From 7dd95d3faa4447fbd2c3f797ce91501222cd56d7 Mon Sep 17 00:00:00 2001 From: vipul-21 Date: Tue, 11 Apr 2023 15:06:45 -0700 Subject: [PATCH] ci: [CNI] Add restart node in stage in the load test of cni pipeline --- .../cni/cilium/cilium-cni-load-test.yaml | 33 ++++++++- hack/scripts/scale_deployment.sh | 69 ++++++++++++++----- hack/scripts/validate_state.sh | 8 ++- hack/swift/Makefile | 4 ++ hack/swift/README.md | 1 + 5 files changed, 97 insertions(+), 18 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-cni-load-test.yaml b/.pipelines/cni/cilium/cilium-cni-load-test.yaml index 86c32ee167..f0adb503ca 100644 --- a/.pipelines/cni/cilium/cilium-cni-load-test.yaml +++ b/.pipelines/cni/cilium/cilium-cni-load-test.yaml @@ -36,6 +36,8 @@ stages: set -ex az extension add --name aks-preview make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} + kubectl apply -f hack/manifests/pod.yaml + kubectl apply -f hack/manifests/hostprocess.yaml bash hack/scripts/scale_deployment.sh - stage: validate_state dependsOn: pod_deployment @@ -56,8 +58,37 @@ stages: name: "ValidateState" displayName: "Validate State" retryCountOnTaskFailure: 3 - - stage: connectivity_tests + - stage: restart_nodes dependsOn: validate_state + displayName: "Restart Node" + jobs: + - job: restart_nodes + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + echo "Scale up the pods and immediated restart the nodes" + make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} + make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION) + echo "Scaling the pods down to 100 per node" + bash ./hack/scripts/scale_deployment.sh -n 0 -u 1000 -s + echo "Restarting the nodes" + vmss_name=$(az vmss list -g MC_${RESOURCE_GROUP}_${RESOURCE_GROUP}_$(LOCATION) --query "[].name" -o tsv) + make -C ./hack/swift restart-vmss AZCLI=az CLUSTER=${RESOURCE_GROUP} REGION=$(LOCATION) VMSS_NAME=$vmss_name + bash ./hack/scripts/scale_deployment.sh -n 0 -u 1000 -c + name: "RestartNodes" + displayName: "Restart Nodes" + - script: | + bash hack/scripts/validate_state.sh + name: "ValidateState" + displayName: "Validate State" + retryCountOnTaskFailure: 3 + - stage: connectivity_tests + dependsOn: restart_nodes displayName: "Connectivity Tests" jobs: - job: cni_tests diff --git a/hack/scripts/scale_deployment.sh b/hack/scripts/scale_deployment.sh index 899e662d04..ceb715057d 100644 --- a/hack/scripts/scale_deployment.sh +++ b/hack/scripts/scale_deployment.sh @@ -1,12 +1,23 @@ #!/bin/bash set -ex -kubectl apply -f hack/manifests/pod.yaml -kubectl apply -f hack/manifests/hostprocess.yaml -sleep 1m -total_num_of_run=4 +total_num_of_run=5 scale_up_of_pods=2400 scale_down_pods=1 -echo "Total num of run $total_num_of_run" + +function help() +{ + echo "Scale deployment based on the parameters." + echo "By default script will repeat the process of scale up/down" + echo + echo "Syntax: scale [-h|n|u|s|c|r]" + echo "options:" + echo "h Print this help." + echo "n Number of times the scale down/scale up task should run." + echo "u Number of pods to be scaled up." + echo "s Scale the pods single time. Accepted Values: true, default : false" + echo "c Check deployment status. Accepted Values: true, default : false" + echo +} function check_deployment() { available=-1 @@ -22,16 +33,42 @@ function check_deployment() { echo "deployment complete." } -for ((i=1; i <= total_num_of_run; i++)) -do - echo "Current Run: $i" - echo "Scaling pods to : $scale_up_of_pods" - kubectl scale deployment container --replicas $scale_up_of_pods - check_deployment $scale_up_of_pods - echo "Scaling down pods to : $scale_down_pods" - kubectl scale deployment container --replicas $scale_down_pods - check_deployment $scale_down_pods +function scale_deployment() +{ + desired_replicas=$1 + kubectl scale deployment container --replicas "$desired_replicas" + echo "Scaled the deployment to $desired_replicas" +} + +function repeat_deployment() { + echo "Total num of run $total_num_of_run" + for ((i=1; i <= total_num_of_run; i++)) + do + echo "Current Run: $i" + echo "Scaling down pods to : $scale_down_pods" + scale_deployment $scale_down_pods + check_deployment $scale_down_pods + echo "Scaling pods to : $scale_up_of_pods" + scale_deployment "$scale_up_of_pods" + check_deployment "$scale_up_of_pods" + done +} + +while getopts ":h:n:u:sc" option; do + case $option in + h) help + exit;; + n) total_num_of_run=$OPTARG;; + u) scale_up_of_pods=$OPTARG;; + s) echo "Scale deployment" + scale_deployment "$scale_up_of_pods";; + c) echo "Check deployment" + check_deployment "$scale_up_of_pods";; + \?) echo "Error: Invalid option" + exit;; + esac done -kubectl scale deployment container --replicas $scale_up_of_pods -check_deployment $scale_up_of_pods +if [ "$total_num_of_run" -gt 0 ]; then + repeat_deployment +fi diff --git a/hack/scripts/validate_state.sh b/hack/scripts/validate_state.sh index c9b8e49595..fa4e9cfc70 100644 --- a/hack/scripts/validate_state.sh +++ b/hack/scripts/validate_state.sh @@ -15,6 +15,11 @@ do node_name="${node##*/}" node_ip=$(kubectl get "$node" -o jsonpath='{$.status.addresses[?(@.type=="InternalIP")].address}') echo "Node internal ip: $node_ip" + echo "checking whether the node has any pods deployed to it or not" + pod_count=$(kubectl get pods -o wide | grep "$node_name" -c) + if [[ $pod_count -eq 0 ]]; then + continue + fi privileged_pod=$(kubectl get pods -n kube-system -l app=privileged-daemonset -o wide | grep "$node_name" | awk '{print $1}') echo "privileged pod : $privileged_pod" if [ "$privileged_pod" == '' ]; then @@ -57,7 +62,8 @@ do kubectl get pods -A -o wide exit 1 fi - total_pods_ips=$(echo "$total_pods" | jq -r '(.items[] | .status.podIP)') + + total_pods_ips=$(echo "$total_pods" | jq -r '(.items[] | select(.status.podIP != "" and .status.podIP != null)) | .status.podIP') pod_ips=() num_of_pod_ips=0 for ip in $total_pods_ips diff --git a/hack/swift/Makefile b/hack/swift/Makefile index 964c6f019e..58d185d250 100644 --- a/hack/swift/Makefile +++ b/hack/swift/Makefile @@ -56,6 +56,7 @@ vars: ## Show the input vars configured for the cluster commands @echo OS_SKU=$(OS_SKU) @echo VM_SIZE=$(VM_SIZE) @echo NODE_COUNT=$(NODE_COUNT) + @echo VMSS_NAME=$(VMSS_NAME) ##@ SWIFT Infra @@ -165,3 +166,6 @@ down: ## Delete the cluster $(AZCLI) aks delete -g $(GROUP) -n $(CLUSTER) --yes @$(MAKE) unset-kubeconf @$(MAKE) rg-down + +restart-vmss: ## Restarts the nodes in the cluster + $(AZCLI) vmss restart -g MC_${GROUP}_${CLUSTER}_${REGION} --name $(VMSS_NAME) diff --git a/hack/swift/README.md b/hack/swift/README.md index 96a4f2dc85..40069d88a3 100644 --- a/hack/swift/README.md +++ b/hack/swift/README.md @@ -29,4 +29,5 @@ AKS Clusters swift-cilium-up Bring up a SWIFT Cilium cluster swift-up Bring up a SWIFT AzCNI cluster down Delete the cluster + vmss-restart Restart the nodes of the cluster ```