Azure · vipul-21 · Apr 21, 2023 · Apr 11, 2023 · tamilmani1989 · Apr 21, 2023
@@ -36,6 +36,8 @@ stages:
                 set -ex
                 az extension add --name aks-preview
                 make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}
+                kubectl apply -f hack/manifests/pod.yaml
+                kubectl apply -f hack/manifests/hostprocess.yaml
                 bash hack/scripts/scale_deployment.sh
   - stage: validate_state
     dependsOn: pod_deployment
@@ -56,8 +58,37 @@ stages:
             name: "ValidateState"
             displayName: "Validate State"
             retryCountOnTaskFailure: 3
-  - stage: connectivity_tests
+  - stage: restart_nodes
     dependsOn: validate_state
+    displayName: "Restart Node"
+    jobs:
+      - job: restart_nodes
+        steps:
+          - task: AzureCLI@1
+            inputs:
+              azureSubscription: $(TEST_SUB_SERVICE_CONNECTION)
+              scriptLocation: "inlineScript"
+              scriptType: "bash"
+              addSpnToEnvironment: true
+              inlineScript: |
+                echo "Scale up the pods and immediated restart the nodes"
+                make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}
+                make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION)
+                echo "Scaling the pods down to 100 per node"
+                bash ./hack/scripts/scale_deployment.sh -n 0 -u 1000 -s
+                echo "Restarting the nodes"
+                vmss_name=$(az vmss list -g MC_${RESOURCE_GROUP}_${RESOURCE_GROUP}_$(LOCATION) --query "[].name" -o tsv)
+                make -C ./hack/swift restart-vmss AZCLI=az CLUSTER=${RESOURCE_GROUP} REGION=$(LOCATION) VMSS_NAME=$vmss_name
+                bash ./hack/scripts/scale_deployment.sh -n 0 -u 1000 -c
+            name: "RestartNodes"
+            displayName: "Restart Nodes"
+          - script: |
+              bash hack/scripts/validate_state.sh
+            name: "ValidateState"
+            displayName: "Validate State"
+            retryCountOnTaskFailure: 3
+  - stage: connectivity_tests
+    dependsOn: restart_nodes
     displayName: "Connectivity Tests"
     jobs:
       - job: cni_tests

@@ -1,12 +1,23 @@
 #!/bin/bash
 set -ex
-kubectl apply -f hack/manifests/pod.yaml
-kubectl apply -f hack/manifests/hostprocess.yaml
-sleep 1m
-total_num_of_run=4
+total_num_of_run=5
 scale_up_of_pods=2400
 scale_down_pods=1
-echo "Total num of run $total_num_of_run"
+
+function help()
+{
+    echo "Scale deployment based on the parameters."
+    echo "By default script will repeat the process of scale up/down"
+    echo
+    echo "Syntax: scale [-h|n|u|s|c|r]"
+    echo "options:"
+    echo "h     Print this help."
+    echo "n     Number of times the scale down/scale up task should run."
+    echo "u     Number of pods to be scaled up."
+    echo "s     Scale the pods single time. Accepted Values: true, default : false"
+    echo "c     Check deployment status. Accepted Values: true, default : false"
+    echo
+}
 
 function check_deployment() {
     available=-1
@@ -22,16 +33,42 @@ function check_deployment() {
     echo "deployment complete."
 }
 
-for ((i=1; i <= total_num_of_run; i++))
-do 
-    echo "Current Run: $i"
-    echo "Scaling pods to : $scale_up_of_pods"
-    kubectl scale deployment container --replicas $scale_up_of_pods
-    check_deployment $scale_up_of_pods
-    echo "Scaling down pods to : $scale_down_pods"
-    kubectl scale deployment container --replicas $scale_down_pods
-    check_deployment $scale_down_pods
+function scale_deployment()
+{
+    desired_replicas=$1
+    kubectl scale deployment container --replicas "$desired_replicas"
+    echo "Scaled the deployment to $desired_replicas"
+}
+
+function repeat_deployment() {
+    echo "Total num of run $total_num_of_run"
+    for ((i=1; i <= total_num_of_run; i++))
+    do 
+        echo "Current Run: $i"
+        echo "Scaling down pods to : $scale_down_pods"
+        scale_deployment $scale_down_pods
+        check_deployment $scale_down_pods
+        echo "Scaling pods to : $scale_up_of_pods"
+        scale_deployment "$scale_up_of_pods"
+        check_deployment "$scale_up_of_pods"
+    done
+}
+
+while getopts ":h:n:u:sc" option; do
+   case $option in
+        h)  help
+            exit;;
+        n)  total_num_of_run=$OPTARG;;
+        u)  scale_up_of_pods=$OPTARG;;
+        s)  echo "Scale deployment"
+            scale_deployment "$scale_up_of_pods";;
+        c)  echo "Check deployment"
+            check_deployment "$scale_up_of_pods";;
+        \?) echo "Error: Invalid option"
+            exit;;
+   esac
 done
 
-kubectl scale deployment container --replicas $scale_up_of_pods
-check_deployment $scale_up_of_pods
+if [ "$total_num_of_run" -gt 0 ]; then
+    repeat_deployment
+fi
@@ -15,6 +15,11 @@ do
     node_name="${node##*/}"
     node_ip=$(kubectl get "$node"  -o jsonpath='{$.status.addresses[?(@.type=="InternalIP")].address}')
     echo "Node internal ip: $node_ip"
+    echo "checking whether the node has any pods deployed to it or not"
+    pod_count=$(kubectl get pods -o wide | grep "$node_name" -c)
+    if [[ $pod_count -eq 0 ]]; then
+        continue
+    fi
     privileged_pod=$(kubectl get pods -n kube-system -l app=privileged-daemonset -o wide | grep "$node_name" | awk '{print $1}')
     echo "privileged pod : $privileged_pod"
     if [ "$privileged_pod" == '' ]; then
@@ -57,7 +62,8 @@ do
         kubectl get pods -A -o wide
         exit 1
     fi
-    total_pods_ips=$(echo "$total_pods" | jq -r '(.items[] | .status.podIP)')
+
+    total_pods_ips=$(echo "$total_pods" | jq -r '(.items[] | select(.status.podIP != "" and .status.podIP != null)) | .status.podIP')
     pod_ips=()
     num_of_pod_ips=0
     for ip in $total_pods_ips

@@ -56,6 +56,7 @@ vars: ## Show the input vars configured for the cluster commands
 	@echo OS_SKU=$(OS_SKU)
 	@echo VM_SIZE=$(VM_SIZE)
 	@echo NODE_COUNT=$(NODE_COUNT)
+	@echo VMSS_NAME=$(VMSS_NAME)
 
 
 ##@ SWIFT Infra
@@ -165,3 +166,6 @@ down: ## Delete the cluster
 	$(AZCLI) aks delete -g $(GROUP) -n $(CLUSTER) --yes
 	@$(MAKE) unset-kubeconf
 	@$(MAKE) rg-down
+
+restart-vmss: ## Restarts the nodes in the cluster
+	$(AZCLI) vmss restart -g MC_${GROUP}_${CLUSTER}_${REGION} --name $(VMSS_NAME)
@@ -29,4 +29,5 @@ AKS Clusters
   swift-cilium-up  Bring up a SWIFT Cilium cluster
   swift-up         Bring up a SWIFT AzCNI cluster
   down             Delete the cluster
+  vmss-restart     Restart the nodes of the cluster
 ```