From f1fa9d9e8a6d6402d103f52071cebf8a0a2e2467 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 25 Jul 2023 15:27:42 -0700 Subject: [PATCH 01/67] ci: adding scale pipeline for cilium --- .pipelines/cni/cilium/cilium-scale-test.yaml | 210 ++++++++++++++++++ .../cni/scale-test-templates/apache.yaml | 18 ++ hack/aks/Makefile | 3 + 3 files changed, 231 insertions(+) create mode 100644 .pipelines/cni/cilium/cilium-scale-test.yaml create mode 100644 .pipelines/cni/scale-test-templates/apache.yaml diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml new file mode 100644 index 0000000000..16aaeb3e61 --- /dev/null +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -0,0 +1,210 @@ +pr: none +trigger: none + +stages: + - stage: update_cilium_version + displayName: "Update Cilium Version and Restart Nodes" + jobs: + - job: update_version + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "Redeploy all cilium components and update cilium version. Redeploy all to catch all changes between versions" + echo "deploy Cilium ConfigMap" + kubectl apply -f cilium/configmap.yaml + kubectl apply -f test/integration/manifests/cilium/cilium-config.yaml + echo "install Cilium ${CILIUM_VERSION_TAG}" + envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/daemonset.yaml | kubectl apply -f - + envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/deployment.yaml | kubectl apply -f - + kubectl apply -f test/integration/manifests/cilium/cilium-agent + kubectl apply -f test/integration/manifests/cilium/cilium-operator + vmss_name=$(az vmss list -g MC_${CLUSTER}_${CLUSTER}_$(LOCATION) --query "[].name" -o tsv) + make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${CLUSTER} REGION=$(LOCATION) VMSS_NAME=$vmss_name + kubectl get node + kubectl get pod -A + name: "UpdateCiliumVersion" + displayName: "Update Cilium Version" + - stage: scale_up_cluster + displayName: "Scale Up Cluster" + jobs: + - job: scale_up1000 + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -ex + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "Scaling to 1000 nodes" + az aks nodepool scale --name nodepool1 --cluster-name ${CLUSTER} --resource-group ${CLUSTER} --node-count 1000 + name: "ScaleUp1000" + displayName: "Scale up to 1000 Nodes" + timeoutInMinutes: 0 + - stage: label_nodes + displayName: "Label Nodes for Testing" + jobs: + - job: label_nodes + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + echo "Set node label scale-test=true and connectivity-test=true for testing" + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + cd test/scale + chmod +x label-nodes.sh + ./label-nodes.sh + name: "LabelNodes" + displayName: "Label all Nodes" + - stage: scale_cluster_deployments + displayName: "Scale deploments for Network Policies Check" + jobs: + - job: scale_deployments + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + echo "scale deployment and to prep for network policies test" + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + cd test/scale + chmod +x test-scale.sh + ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=1 --num-real-replicas=80 --num-network-policies=2000 --num-unapplied-network-policies=2000 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels + name: "scaling" + displayName: "Run scale script" + - stage: test_network_policies_connectivity + displayName: "Test Network Policies" + jobs: + - job: network_policies + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + echo "Run network policies test" + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + cd test/scale/connectivity + chmod +x test-connectivity.sh + ./test-connectivity.sh --num-scale-pods-to-verify=80 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + name: "TestNetworkPolicies" + displayName: "Network Policies Scale Test" + timeoutInMinutes: 120 + - stage: scale_for_load_tests + displayName: "Scale for load tests" + jobs: + - job: deploy_service + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + cd test/scale + chmod +x test-scale.sh + ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=0 --num-real-replicas=60 --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels --num-real-nginx-deployments=1 + name: "TestLBServices" + displayName: "Scale for load tests" + - stage: benchmark_testing + displayName: "Run apachebench test" + jobs: + - job: ap_test + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "Deploy apachebench pod and run test" + cd .pipelines/cni/scale-test-templates + kubectl apply -f apache.yaml + echo "wait for pod to become ready" + sleep 5s + kubectl get pod -owide + mkdir results + cd results + AB_POD=$(kubectl get pod -l app=apachebench | grep apachebench | awk '{print $1}') + kubectl exec -it $AB_POD -- ab -n 100000 -c 3 -r http://real-svc-00001.scale-test/ >> "ab_100krequests_60kpods.log" + echo "running k top node" + kubectl top node >> "node_cpu_and_mem.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_cpu_and_mem.log" + echo "Logs will be available as a build artifact" + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/results/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR + cd .. + sudo cp ./results/* $ARTIFACT_DIR + name: "TestLBServices" + displayName: "Apachebench testing" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: results + pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" + condition: always() + name: "PublishResults" + displayName: "Apachebench Result Artifacts" + - stage: scale_down_cluster + displayName: "Scale Down Cluster" + jobs: + - job: scale_down + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -ex + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "Scaling to 5 nodes" + vmss_name=$(az vmss list -g MC_${CLUSTER}_${CLUSTER}_$(LOCATION) --query "[].name" -o tsv) + make -C ./hack/aks scale-vmss AZCLI=az CLUSTER=${CLUSTER} REGION=$(LOCATION) VMSS_NAME=$vmss_name NODE_COUNT=5 + kubectl get node + name: "ScaleDown" + displayName: "Scale down to 5 Nodes" + timeoutInMinutes: 0 + - stage: delete_test_namespaces + displayName: "Delete Test Namespaces" + jobs: + - job: delete_namespaces + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + echo "scale deployment and test network policies" + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + kubectl delete ns scale-test + kubectl delete ns connectivity-test + kubectl get ns + name: "DeleteTestNamespaces" + displayName: "Delete Test Namespaces" diff --git a/.pipelines/cni/scale-test-templates/apache.yaml b/.pipelines/cni/scale-test-templates/apache.yaml new file mode 100644 index 0000000000..1a06512d4e --- /dev/null +++ b/.pipelines/cni/scale-test-templates/apache.yaml @@ -0,0 +1,18 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: apachebench +spec: + selector: + matchLabels: + app: apachebench + template: + metadata: + labels: + app: apachebench + spec: + containers: + - name: ubuntu-tools + image: tamilmani1989/ubuntu18-tools + command: ["/bin/sleep", "3650d"] + \ No newline at end of file diff --git a/hack/aks/Makefile b/hack/aks/Makefile index 14e2f1e0f7..8f753f2d04 100644 --- a/hack/aks/Makefile +++ b/hack/aks/Makefile @@ -235,3 +235,6 @@ down: ## Delete the cluster restart-vmss: ## Restarts the nodes in the cluster $(AZCLI) vmss restart -g MC_${GROUP}_${CLUSTER}_${REGION} --name $(VMSS_NAME) + +scale-vmss: ## Scales the nodes in the cluster + $(AZCLI) vmss scale -g MC_${GROUP}_${CLUSTER}_${REGION} --name $(VMSS_NAME) --new-capacity $(NODE_COUNT) From 6d796713b48629a1325dbd18eb85888997c879b9 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 26 Jul 2023 09:12:42 -0700 Subject: [PATCH 02/67] change timeout --- .pipelines/cni/cilium/cilium-scale-test.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 16aaeb3e61..eabfdde836 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -91,6 +91,7 @@ stages: displayName: "Test Network Policies" jobs: - job: network_policies + timeoutInMinutes: 120 steps: - task: AzureCLI@1 inputs: @@ -103,10 +104,9 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale/connectivity chmod +x test-connectivity.sh - ./test-connectivity.sh --num-scale-pods-to-verify=80 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + ./test-connectivity.sh --num-scale-pods-to-verify=40 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 name: "TestNetworkPolicies" displayName: "Network Policies Scale Test" - timeoutInMinutes: 120 - stage: scale_for_load_tests displayName: "Scale for load tests" jobs: @@ -122,7 +122,7 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale chmod +x test-scale.sh - ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=0 --num-real-replicas=60 --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels --num-real-nginx-deployments=1 + ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=1 --num-real-replicas=60 --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels --real-pod-type=nginx name: "TestLBServices" displayName: "Scale for load tests" - stage: benchmark_testing From f689cdf7ef498260ea45ef8d88497285407bc47c Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 26 Jul 2023 09:17:13 -0700 Subject: [PATCH 03/67] change timeout --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index eabfdde836..0a727f407b 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -104,7 +104,7 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale/connectivity chmod +x test-connectivity.sh - ./test-connectivity.sh --num-scale-pods-to-verify=40 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + ./test-connectivity.sh --num-scale-pods-to-verify=80 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 name: "TestNetworkPolicies" displayName: "Network Policies Scale Test" - stage: scale_for_load_tests From b523941d6fbbd34b509e49330e46134b33b88bd3 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 26 Jul 2023 14:37:10 -0700 Subject: [PATCH 04/67] update net policies --- .pipelines/cni/cilium/cilium-scale-test.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 0a727f407b..75a35237b1 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -104,9 +104,9 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale/connectivity chmod +x test-connectivity.sh - ./test-connectivity.sh --num-scale-pods-to-verify=80 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + ./test-connectivity.sh --num-scale-pods-to-verify=60 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 name: "TestNetworkPolicies" - displayName: "Network Policies Scale Test" + displayName: "Network Policies Connectivity Test" - stage: scale_for_load_tests displayName: "Scale for load tests" jobs: @@ -128,7 +128,7 @@ stages: - stage: benchmark_testing displayName: "Run apachebench test" jobs: - - job: ap_test + - job: apachebench_test steps: - task: AzureCLI@1 inputs: From 41a1b877254a44b11fcbf1f066b5ca5c766d206b Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 26 Jul 2023 16:27:18 -0700 Subject: [PATCH 05/67] use acn build pool to avoid azp delays --- .pipelines/cni/cilium/cilium-scale-test.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 75a35237b1..0b7761409f 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -6,6 +6,8 @@ stages: displayName: "Update Cilium Version and Restart Nodes" jobs: - job: update_version + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -34,6 +36,8 @@ stages: displayName: "Scale Up Cluster" jobs: - job: scale_up1000 + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -53,6 +57,8 @@ stages: displayName: "Label Nodes for Testing" jobs: - job: label_nodes + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -72,6 +78,8 @@ stages: displayName: "Scale deploments for Network Policies Check" jobs: - job: scale_deployments + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -91,6 +99,8 @@ stages: displayName: "Test Network Policies" jobs: - job: network_policies + pool: + name: "$(BUILD_POOL_ACN)" timeoutInMinutes: 120 steps: - task: AzureCLI@1 @@ -111,6 +121,8 @@ stages: displayName: "Scale for load tests" jobs: - job: deploy_service + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -129,6 +141,8 @@ stages: displayName: "Run apachebench test" jobs: - job: apachebench_test + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -172,6 +186,8 @@ stages: displayName: "Scale Down Cluster" jobs: - job: scale_down + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -193,6 +209,8 @@ stages: displayName: "Delete Test Namespaces" jobs: - job: delete_namespaces + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: From b68f1695a272a38ad6bf4a2acafbbd2e1f34d524 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 27 Jul 2023 11:29:35 -0700 Subject: [PATCH 06/67] address comments -- update build pool var and remove timeouts from scale --- .pipelines/cni/cilium/cilium-scale-test.yaml | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 0b7761409f..bb8b8e3f2c 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -7,7 +7,7 @@ stages: jobs: - job: update_version pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -37,7 +37,7 @@ stages: jobs: - job: scale_up1000 pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -52,13 +52,12 @@ stages: az aks nodepool scale --name nodepool1 --cluster-name ${CLUSTER} --resource-group ${CLUSTER} --node-count 1000 name: "ScaleUp1000" displayName: "Scale up to 1000 Nodes" - timeoutInMinutes: 0 - stage: label_nodes displayName: "Label Nodes for Testing" jobs: - job: label_nodes pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -79,7 +78,7 @@ stages: jobs: - job: scale_deployments pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -100,7 +99,7 @@ stages: jobs: - job: network_policies pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" timeoutInMinutes: 120 steps: - task: AzureCLI@1 @@ -122,7 +121,7 @@ stages: jobs: - job: deploy_service pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -142,7 +141,7 @@ stages: jobs: - job: apachebench_test pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -187,7 +186,7 @@ stages: jobs: - job: scale_down pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -204,13 +203,12 @@ stages: kubectl get node name: "ScaleDown" displayName: "Scale down to 5 Nodes" - timeoutInMinutes: 0 - stage: delete_test_namespaces displayName: "Delete Test Namespaces" jobs: - job: delete_namespaces pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: From b6c38d7d083c1913bcb5434a1c20aeb20d38f3b7 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Fri, 28 Jul 2023 09:33:29 -0700 Subject: [PATCH 07/67] address comments -- set node/pod counts and test input as variables --- .pipelines/cni/cilium/cilium-scale-test.yaml | 20 +++++++++---------- .../manifests}/apache.yaml | 0 2 files changed, 10 insertions(+), 10 deletions(-) rename {.pipelines/cni/scale-test-templates => hack/manifests}/apache.yaml (100%) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index bb8b8e3f2c..550cea4531 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -48,10 +48,10 @@ stages: inlineScript: | set -ex az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} - echo "Scaling to 1000 nodes" - az aks nodepool scale --name nodepool1 --cluster-name ${CLUSTER} --resource-group ${CLUSTER} --node-count 1000 - name: "ScaleUp1000" - displayName: "Scale up to 1000 Nodes" + echo "Scaling up nodes" + az aks nodepool scale --name nodepool1 --cluster-name ${CLUSTER} --resource-group ${CLUSTER} --node-count ${NODE_COUNT_UP} + name: "ScaleUp" + displayName: "Scale up Nodes" - stage: label_nodes displayName: "Label Nodes for Testing" jobs: @@ -91,7 +91,7 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale chmod +x test-scale.sh - ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=1 --num-real-replicas=80 --num-network-policies=2000 --num-unapplied-network-policies=2000 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels + ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_NETPOL} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_NETPOL} --num-real-replicas=${NUM_REAL_REPLICAS_NETPOL} --num-network-policies=${APPLIED_NETPOL} --num-unapplied-network-policies=${UNAPPLIED_NETPOL} --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_NETPOL} --delete-labels name: "scaling" displayName: "Run scale script" - stage: test_network_policies_connectivity @@ -113,7 +113,7 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale/connectivity chmod +x test-connectivity.sh - ./test-connectivity.sh --num-scale-pods-to-verify=60 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + ./test-connectivity.sh --num-scale-pods-to-verify=${NUM_SCALE_PODS_TO_VERIFY} --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 name: "TestNetworkPolicies" displayName: "Network Policies Connectivity Test" - stage: scale_for_load_tests @@ -133,7 +133,7 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale chmod +x test-scale.sh - ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=1 --num-real-replicas=60 --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels --real-pod-type=nginx + ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_LB} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_LB} --num-real-replicas=${NUM_REAL_REPLICAS_LB} --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_LB} --delete-labels --real-pod-type=nginx name: "TestLBServices" displayName: "Scale for load tests" - stage: benchmark_testing @@ -152,7 +152,7 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} echo "Deploy apachebench pod and run test" - cd .pipelines/cni/scale-test-templates + cd hack/manifests kubectl apply -f apache.yaml echo "wait for pod to become ready" sleep 5s @@ -160,7 +160,7 @@ stages: mkdir results cd results AB_POD=$(kubectl get pod -l app=apachebench | grep apachebench | awk '{print $1}') - kubectl exec -it $AB_POD -- ab -n 100000 -c 3 -r http://real-svc-00001.scale-test/ >> "ab_100krequests_60kpods.log" + kubectl exec -it $AB_POD -- ab -n ${AB_REQUESTS} -c ${AB_CONCURRENCY} -r http://real-svc-00001.scale-test/ >> "ab_${AB_REQUESTS}krequests_${NUM_REAL_REPLICAS_LB}kpods.log" echo "running k top node" kubectl top node >> "node_cpu_and_mem.log" echo "running k top pod" @@ -217,7 +217,7 @@ stages: scriptType: "bash" addSpnToEnvironment: true inlineScript: | - echo "scale deployment and test network policies" + echo "delete namespaces scale-test and connectivity-test" az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} kubectl delete ns scale-test kubectl delete ns connectivity-test diff --git a/.pipelines/cni/scale-test-templates/apache.yaml b/hack/manifests/apache.yaml similarity index 100% rename from .pipelines/cni/scale-test-templates/apache.yaml rename to hack/manifests/apache.yaml From a62049df6dc4624780c6b10ad225a470b8d40b9c Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Fri, 28 Jul 2023 12:15:30 -0700 Subject: [PATCH 08/67] remove all test resources --- .pipelines/cni/cilium/cilium-scale-test.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 550cea4531..b0d29f413c 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -35,7 +35,7 @@ stages: - stage: scale_up_cluster displayName: "Scale Up Cluster" jobs: - - job: scale_up1000 + - job: scale_up pool: name: "$(BUILD_POOL_NAME_DEFAULT)" steps: @@ -160,7 +160,7 @@ stages: mkdir results cd results AB_POD=$(kubectl get pod -l app=apachebench | grep apachebench | awk '{print $1}') - kubectl exec -it $AB_POD -- ab -n ${AB_REQUESTS} -c ${AB_CONCURRENCY} -r http://real-svc-00001.scale-test/ >> "ab_${AB_REQUESTS}krequests_${NUM_REAL_REPLICAS_LB}kpods.log" + kubectl exec -it $AB_POD -- ab -n ${AB_REQUESTS} -c ${AB_CONCURRENCY} -r http://real-svc-00001.scale-test/ >> "ab_${AB_REQUESTS}requests_${NUM_REAL_REPLICAS_LB}kpods.log" echo "running k top node" kubectl top node >> "node_cpu_and_mem.log" echo "running k top pod" @@ -217,10 +217,12 @@ stages: scriptType: "bash" addSpnToEnvironment: true inlineScript: | - echo "delete namespaces scale-test and connectivity-test" + echo "delete test resources and namespaces" az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} kubectl delete ns scale-test kubectl delete ns connectivity-test kubectl get ns + cd hack/manifests + kubectl delete -f apache.yaml name: "DeleteTestNamespaces" displayName: "Delete Test Namespaces" From 44d56cb163b497c003b19d313bbb78aa13575e25 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Mon, 31 Jul 2023 10:10:52 -0700 Subject: [PATCH 09/67] check apachebench rollout status --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index b0d29f413c..3fb0e07f11 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -155,7 +155,7 @@ stages: cd hack/manifests kubectl apply -f apache.yaml echo "wait for pod to become ready" - sleep 5s + kubectl rollout status deployment apachebench --timeout=30s kubectl get pod -owide mkdir results cd results From 6dd42144d8431bc7ce600cbbd30881dd3c6f7d90 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 09:34:05 -0700 Subject: [PATCH 10/67] collect more cpu/mem results --- .pipelines/cni/cilium/cilium-scale-test.yaml | 33 +++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 3fb0e07f11..71f1759bbe 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -87,8 +87,18 @@ stages: scriptType: "bash" addSpnToEnvironment: true inlineScript: | - echo "scale deployment and to prep for network policies test" az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "collect cpu and memory usage before scaling for network policies" + mkdir results + cd results + mkdir cpu_and_mem + cd cpu_and_mem + echo "running k top node" + kubectl top node >> "node_before_netpol_scale.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_before_netpol_scale.log" + cd ../.. + echo "scale deployment and to prep for network policies test" cd test/scale chmod +x test-scale.sh ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_NETPOL} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_NETPOL} --num-real-replicas=${NUM_REAL_REPLICAS_NETPOL} --num-network-policies=${APPLIED_NETPOL} --num-unapplied-network-policies=${UNAPPLIED_NETPOL} --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_NETPOL} --delete-labels @@ -114,6 +124,12 @@ stages: cd test/scale/connectivity chmod +x test-connectivity.sh ./test-connectivity.sh --num-scale-pods-to-verify=${NUM_SCALE_PODS_TO_VERIFY} --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + echo "collect cpu and mem results after connectivity tests" + cd results/cpu_and_mem + echo "running k top node" + kubectl top node >> "node_after_netpol_tests.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_after_netpol_tests.log" name: "TestNetworkPolicies" displayName: "Network Policies Connectivity Test" - stage: scale_for_load_tests @@ -131,6 +147,13 @@ stages: addSpnToEnvironment: true inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "collect cpu and mem results before scale for lb tests" + cd results/cpu_and_mem + echo "running k top node" + kubectl top node >> "node_before_lb_scale.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_before_lb_scale.log" + cd ../.. cd test/scale chmod +x test-scale.sh ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_LB} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_LB} --num-real-replicas=${NUM_REAL_REPLICAS_LB} --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_LB} --delete-labels --real-pod-type=nginx @@ -157,14 +180,16 @@ stages: echo "wait for pod to become ready" kubectl rollout status deployment apachebench --timeout=30s kubectl get pod -owide - mkdir results cd results AB_POD=$(kubectl get pod -l app=apachebench | grep apachebench | awk '{print $1}') kubectl exec -it $AB_POD -- ab -n ${AB_REQUESTS} -c ${AB_CONCURRENCY} -r http://real-svc-00001.scale-test/ >> "ab_${AB_REQUESTS}requests_${NUM_REAL_REPLICAS_LB}kpods.log" + echo "collect cpu and memory usage after apachebench tests" + cd cpu_and_mem echo "running k top node" - kubectl top node >> "node_cpu_and_mem.log" + kubectl top node >> "node_after_lb_tests.log" echo "running k top pod" - kubectl top pod -A | grep cilium >> "pod_cpu_and_mem.log" + kubectl top pod -A | grep cilium >> "pod_after_lb_tests.log" + cd .. echo "Logs will be available as a build artifact" ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/results/ echo $ARTIFACT_DIR From eab0e41aa696d07c84785ee6c4abf767a1bb8d3c Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 12:58:47 -0700 Subject: [PATCH 11/67] add cns restart and fix artifact upload --- .pipelines/cni/cilium/cilium-scale-test.yaml | 97 ++++++++++++++++---- 1 file changed, 78 insertions(+), 19 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 71f1759bbe..7728968009 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -2,8 +2,8 @@ pr: none trigger: none stages: - - stage: update_cilium_version - displayName: "Update Cilium Version and Restart Nodes" + - stage: update_daemonset_versions + displayName: "Update Cilium + CNS Version and Restart Nodes" jobs: - job: update_version pool: @@ -26,12 +26,18 @@ stages: envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/deployment.yaml | kubectl apply -f - kubectl apply -f test/integration/manifests/cilium/cilium-agent kubectl apply -f test/integration/manifests/cilium/cilium-operator + echo "Checking CNS version is up to date. Update if version is old." + CNS_IMAGE=${CNS_IMAGE} + if [[ grep -A2 'containers:' test/integration/manifests/cns/daemonset.yaml | tail -n1 | awk '{ print $2}' -neq $CNS_IMAGE ]]; then + sed -i '/containers:/{n;n;s/\(image\).*/\1: '"${CNS_IMAGE//\//\\/}"'/}' test/integration/manifests/cns/daemonset.yaml + kubectl apply -f test/integration/manifests/cns/daemonset.yaml + fi vmss_name=$(az vmss list -g MC_${CLUSTER}_${CLUSTER}_$(LOCATION) --query "[].name" -o tsv) make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${CLUSTER} REGION=$(LOCATION) VMSS_NAME=$vmss_name kubectl get node kubectl get pod -A - name: "UpdateCiliumVersion" - displayName: "Update Cilium Version" + name: "UpdateCiliumandCNSVersion" + displayName: "Update Cilium and CNS Version" - stage: scale_up_cluster displayName: "Scale Up Cluster" jobs: @@ -89,21 +95,32 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} echo "collect cpu and memory usage before scaling for network policies" - mkdir results - cd results - mkdir cpu_and_mem - cd cpu_and_mem + mkdir before_netpol_cpu_and_mem + cd before_netpol_cpu_and_mem echo "running k top node" kubectl top node >> "node_before_netpol_scale.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_before_netpol_scale.log" - cd ../.. + echo "Logs will be available as a build artifact" + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/before_netpol_cpu_and_mem/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR + cd .. + sudo cp ./before_netpol_cpu_and_mem/* $ARTIFACT_DIR echo "scale deployment and to prep for network policies test" cd test/scale chmod +x test-scale.sh ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_NETPOL} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_NETPOL} --num-real-replicas=${NUM_REAL_REPLICAS_NETPOL} --num-network-policies=${APPLIED_NETPOL} --num-unapplied-network-policies=${UNAPPLIED_NETPOL} --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_NETPOL} --delete-labels name: "scaling" displayName: "Run scale script" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: before_netpol_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: test_network_policies_connectivity displayName: "Test Network Policies" jobs: @@ -125,13 +142,28 @@ stages: chmod +x test-connectivity.sh ./test-connectivity.sh --num-scale-pods-to-verify=${NUM_SCALE_PODS_TO_VERIFY} --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 echo "collect cpu and mem results after connectivity tests" - cd results/cpu_and_mem + mkdir after_netpol_cpu_and_mem + cd after_netpol_cpu_and_mem echo "running k top node" kubectl top node >> "node_after_netpol_tests.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_after_netpol_tests.log" + echo "Logs will be available as a build artifact" + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/after_netpol_cpu_and_mem/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR + cd .. + sudo cp ./after_netpol_cpu_and_mem/* $ARTIFACT_DIR name: "TestNetworkPolicies" displayName: "Network Policies Connectivity Test" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: after_netpol_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_netpol_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: scale_for_load_tests displayName: "Scale for load tests" jobs: @@ -148,17 +180,31 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} echo "collect cpu and mem results before scale for lb tests" - cd results/cpu_and_mem + mkdir before_lb_cpu_and_mem + cd before_lb_cpu_and_mem echo "running k top node" kubectl top node >> "node_before_lb_scale.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_before_lb_scale.log" - cd ../.. + echo "Logs will be available as a build artifact" + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/before_lb_cpu_and_mem/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR + cd .. + sudo cp ./before_lb_cpu_and_mem/* $ARTIFACT_DIR cd test/scale chmod +x test-scale.sh ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_LB} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_LB} --num-real-replicas=${NUM_REAL_REPLICAS_LB} --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_LB} --delete-labels --real-pod-type=nginx name: "TestLBServices" displayName: "Scale for load tests" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: before_lb_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/before_lb_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: benchmark_testing displayName: "Run apachebench test" jobs: @@ -180,32 +226,45 @@ stages: echo "wait for pod to become ready" kubectl rollout status deployment apachebench --timeout=30s kubectl get pod -owide - cd results + mkdir apachebench + cd apachebench AB_POD=$(kubectl get pod -l app=apachebench | grep apachebench | awk '{print $1}') kubectl exec -it $AB_POD -- ab -n ${AB_REQUESTS} -c ${AB_CONCURRENCY} -r http://real-svc-00001.scale-test/ >> "ab_${AB_REQUESTS}requests_${NUM_REAL_REPLICAS_LB}kpods.log" echo "collect cpu and memory usage after apachebench tests" - cd cpu_and_mem + cd .. + mkdir after_lb_cpu_and_mem echo "running k top node" kubectl top node >> "node_after_lb_tests.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_after_lb_tests.log" cd .. echo "Logs will be available as a build artifact" - ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/results/ + ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/after_lb_cpu_and_mem/ + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/apachebench/ echo $ARTIFACT_DIR - sudo rm -rf $ARTIFACT_DIR + echo $ARTIFACT_DIR2 + sudo rm -rf $ARTIFACT_DIR $ARTIFACT_DIR2 sudo mkdir $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR2 cd .. - sudo cp ./results/* $ARTIFACT_DIR + sudo cp ./apachebench/* $ARTIFACT_DIR + sudo cp ./after_lb_cpu_and_mem/* $ARTIFACT_DIR2 name: "TestLBServices" displayName: "Apachebench testing" - task: PublishBuildArtifacts@1 inputs: - artifactName: results - pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" + artifactName: apachebench + pathtoPublish: "$(Build.ArtifactStagingDirectory)/apachebench" condition: always() name: "PublishResults" displayName: "Apachebench Result Artifacts" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: after_lb_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_lb_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: scale_down_cluster displayName: "Scale Down Cluster" jobs: From 55833108f93105863eee7ef07d86f645d24b9761 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:00:26 -0700 Subject: [PATCH 12/67] add cns restart and fix artifact upload --- .pipelines/cni/cilium/cilium-scale-test.yaml | 36 ++++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 7728968009..8cc866011e 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -115,12 +115,12 @@ stages: name: "scaling" displayName: "Run scale script" - task: PublishBuildArtifacts@1 - inputs: - artifactName: before_netpol_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" - condition: always() - name: "PublishResults" - displayName: "Result Artifacts" + inputs: + artifactName: before_netpol_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: test_network_policies_connectivity displayName: "Test Network Policies" jobs: @@ -158,12 +158,12 @@ stages: name: "TestNetworkPolicies" displayName: "Network Policies Connectivity Test" - task: PublishBuildArtifacts@1 - inputs: - artifactName: after_netpol_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_netpol_cpu_and_mem" - condition: always() - name: "PublishResults" - displayName: "Result Artifacts" + inputs: + artifactName: after_netpol_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_netpol_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: scale_for_load_tests displayName: "Scale for load tests" jobs: @@ -199,12 +199,12 @@ stages: name: "TestLBServices" displayName: "Scale for load tests" - task: PublishBuildArtifacts@1 - inputs: - artifactName: before_lb_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/before_lb_cpu_and_mem" - condition: always() - name: "PublishResults" - displayName: "Result Artifacts" + inputs: + artifactName: before_lb_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/before_lb_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: benchmark_testing displayName: "Run apachebench test" jobs: From ab33ad23d09e56e7346b9c41a1dd43e23024ff82 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:01:11 -0700 Subject: [PATCH 13/67] update name for artifact publishing --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 8cc866011e..076446bd68 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -263,7 +263,7 @@ stages: artifactName: after_lb_cpu_and_mem pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_lb_cpu_and_mem" condition: always() - name: "PublishResults" + name: "PublishResults2" displayName: "Result Artifacts" - stage: scale_down_cluster displayName: "Scale Down Cluster" From d32054747564ea80d11a86914586e8f594e16559 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:12:20 -0700 Subject: [PATCH 14/67] update apachebench artifact collection --- .pipelines/cni/cilium/cilium-scale-test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 076446bd68..e61e10b358 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -233,6 +233,7 @@ stages: echo "collect cpu and memory usage after apachebench tests" cd .. mkdir after_lb_cpu_and_mem + cd after_lb_cpu_and_mem echo "running k top node" kubectl top node >> "node_after_lb_tests.log" echo "running k top pod" From 5c918f094ef0e969ff150ccd4c60a4656e9fc6f6 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:19:36 -0700 Subject: [PATCH 15/67] update apachebench artifact collection --- .pipelines/cni/cilium/cilium-scale-test.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index e61e10b358..53683978a7 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -247,7 +247,6 @@ stages: sudo rm -rf $ARTIFACT_DIR $ARTIFACT_DIR2 sudo mkdir $ARTIFACT_DIR sudo mkdir $ARTIFACT_DIR2 - cd .. sudo cp ./apachebench/* $ARTIFACT_DIR sudo cp ./after_lb_cpu_and_mem/* $ARTIFACT_DIR2 name: "TestLBServices" From 74c6a1d025a219ad170809de7891f4de860ca5a9 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:41:50 -0700 Subject: [PATCH 16/67] test cns version check --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 53683978a7..0c292f2098 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -28,7 +28,7 @@ stages: kubectl apply -f test/integration/manifests/cilium/cilium-operator echo "Checking CNS version is up to date. Update if version is old." CNS_IMAGE=${CNS_IMAGE} - if [[ grep -A2 'containers:' test/integration/manifests/cns/daemonset.yaml | tail -n1 | awk '{ print $2}' -neq $CNS_IMAGE ]]; then + if [[ grep -q -e -A2 'containers:' test/integration/manifests/cns/daemonset.yaml | tail -n1 | awk '{ print $2 }' -ne $CNS_IMAGE ]]; then sed -i '/containers:/{n;n;s/\(image\).*/\1: '"${CNS_IMAGE//\//\\/}"'/}' test/integration/manifests/cns/daemonset.yaml kubectl apply -f test/integration/manifests/cns/daemonset.yaml fi From 0d10ded9f681b986f5d81c0639658c699b605922 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:46:52 -0700 Subject: [PATCH 17/67] change cns update --- .pipelines/cni/cilium/cilium-scale-test.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 0c292f2098..d3428f4ba6 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -26,12 +26,10 @@ stages: envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/deployment.yaml | kubectl apply -f - kubectl apply -f test/integration/manifests/cilium/cilium-agent kubectl apply -f test/integration/manifests/cilium/cilium-operator - echo "Checking CNS version is up to date. Update if version is old." + echo "Keep CNS version up to date, grabbing pipeline parameter" CNS_IMAGE=${CNS_IMAGE} - if [[ grep -q -e -A2 'containers:' test/integration/manifests/cns/daemonset.yaml | tail -n1 | awk '{ print $2 }' -ne $CNS_IMAGE ]]; then - sed -i '/containers:/{n;n;s/\(image\).*/\1: '"${CNS_IMAGE//\//\\/}"'/}' test/integration/manifests/cns/daemonset.yaml - kubectl apply -f test/integration/manifests/cns/daemonset.yaml - fi + sed -i '/containers:/{n;n;s/\(image\).*/\1: '"${CNS_IMAGE//\//\\/}"'/}' test/integration/manifests/cns/daemonset.yaml + kubectl apply -f test/integration/manifests/cns/daemonset.yaml vmss_name=$(az vmss list -g MC_${CLUSTER}_${CLUSTER}_$(LOCATION) --query "[].name" -o tsv) make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${CLUSTER} REGION=$(LOCATION) VMSS_NAME=$vmss_name kubectl get node From 57e335669a019f1496dc37d1162e8795e89138f3 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 16:23:16 -0700 Subject: [PATCH 18/67] update artifact directory name --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index d3428f4ba6..f1e5b241ae 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -115,7 +115,7 @@ stages: - task: PublishBuildArtifacts@1 inputs: artifactName: before_netpol_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" + pathtoPublish: "$(Build.ArtifactStagingDirectory)/before_netpol_cpu_and_mem" condition: always() name: "PublishResults" displayName: "Result Artifacts" From 08960d7742dd4642a6a5498c6e3bdfd469f34e20 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 11:09:51 -0700 Subject: [PATCH 19/67] add netperf testing stage --- .pipelines/cni/cilium/cilium-scale-test.yaml | 45 ++++++++++++++++++++ hack/manifests/netperf-pod.yaml | 25 +++++++++++ hack/scripts/netperf.sh | 42 ++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 hack/manifests/netperf-pod.yaml create mode 100755 hack/scripts/netperf.sh diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index f1e5b241ae..fbf315aee8 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -263,6 +263,50 @@ stages: condition: always() name: "PublishResults2" displayName: "Result Artifacts" + - stage: netperf_tests + displayName: "Run netperf tests" + jobs: + - job: netperf + pool: + name: "$(BUILD_POOL_NAME_DEFAULT)" + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + mkdir netperf + cd netperf + kubectl apply -f test/integration/manifests/netperf-pod.yaml + kubectl rollout status deployment container6 --timeout=30s + sh hack/scripts/netperf.sh + cd .. + echo "collect cpu and mem results after netperf tests" + mkdir netperf_cpu_and_mem + cd netperf_cpu_and_mem + echo "running k top node" + kubectl top node >> "node_netperf.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_netperf.log" + echo "Logs will be available as a build artifact" + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/netperf_cpu_and_mem/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR + cd .. + sudo cp ./netperf_cpu_and_mem/* $ARTIFACT_DIR + name: "TestLBServices" + displayName: "Scale for load tests" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: before_lb_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/netperf_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Netperf Result Artifacts" - stage: scale_down_cluster displayName: "Scale Down Cluster" jobs: @@ -306,5 +350,6 @@ stages: kubectl get ns cd hack/manifests kubectl delete -f apache.yaml + kubectl delete -f netperf-pod.yaml name: "DeleteTestNamespaces" displayName: "Delete Test Namespaces" diff --git a/hack/manifests/netperf-pod.yaml b/hack/manifests/netperf-pod.yaml new file mode 100644 index 0000000000..0f9cf38630 --- /dev/null +++ b/hack/manifests/netperf-pod.yaml @@ -0,0 +1,25 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: container6 + namespace: default +spec: + selector: + matchLabels: + app: container6 + replicas: 3 + template: # create pods using pod definition in this template + metadata: + # unlike pod-nginx.yaml, the name is not included in the meta data as a unique name is + # generated from the deployment name + labels: + app: container6 + spec: + containers: + - name: ubuntu + image: tamilmani1989/ubuntu18-tools + imagePullPolicy: Always + command: ["/bin/sh","-c"] + args: ["echo helloworld>hello.txt; php -S 0.0.0.0:9568"] + securityContext: + privileged: true diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh new file mode 100755 index 0000000000..c78649d1ba --- /dev/null +++ b/hack/scripts/netperf.sh @@ -0,0 +1,42 @@ +#!/bin/bash +for node in $(kubectl get nodes -o name); +do + echo "Current : $node" + node_name="${node##*/}" + echo "checking whether the node has any netperf pods deployed to it" + pod_count=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" -c) + netperf_pod=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" | awk '{print $1}') + echo "netperf pod : $netperf_pod" + echo "pod_count: $pod_count" + + if [ $pod_count -gt 1 ]; then + target_pod=$(echo $netperf_pod | cut -d" " -f 1) + target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') + same_vm_pod=$(echo $netperf_pod | cut -d" " -f 2) + kubectl exec -it $target_pod -- netserver + else + diff_vm_pod=$netperf_pod + fi +done + +#netperf on same vm pod +iteration=10 +while [ $iteration -ge 0 ] +do + echo "============ Iteration $iteration ===============" + kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "same_vm_iteration_$iteration.log" + echo "===============================" + sleep 5s + iteration=$((iteration-1)) +done + +#netperf on different vm pod +iteration=10 +while [ $iteration -ge 0 ] +do + echo "============ Iteration $iteration ===============" + kubectl exec -it $diff_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "diff_vm_iteration_$iteration.log" + echo "===============================" + sleep 5s + iteration=$((iteration-1)) +done From 4dc5ce8e53de4ddf75634af728380bd1fbe4a673 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:17:38 -0700 Subject: [PATCH 20/67] give permissions to netperf script --- .pipelines/cni/cilium/cilium-scale-test.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index fbf315aee8..7cd8d39baa 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -278,6 +278,7 @@ stages: addSpnToEnvironment: true inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + chmod +x hack/scripts/netperf.sh mkdir netperf cd netperf kubectl apply -f test/integration/manifests/netperf-pod.yaml @@ -299,7 +300,7 @@ stages: cd .. sudo cp ./netperf_cpu_and_mem/* $ARTIFACT_DIR name: "TestLBServices" - displayName: "Scale for load tests" + displayName: "Run Netperf tests" - task: PublishBuildArtifacts@1 inputs: artifactName: before_lb_cpu_and_mem From b563f81e7c5afda9f57d403d8ae12bff96646821 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:21:52 -0700 Subject: [PATCH 21/67] change netperf steps --- .pipelines/cni/cilium/cilium-scale-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 7cd8d39baa..a5c091c52f 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -279,10 +279,10 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} chmod +x hack/scripts/netperf.sh - mkdir netperf - cd netperf kubectl apply -f test/integration/manifests/netperf-pod.yaml kubectl rollout status deployment container6 --timeout=30s + mkdir netperf + cd netperf sh hack/scripts/netperf.sh cd .. echo "collect cpu and mem results after netperf tests" From f2d66609649c957edb4b4a3444d874146eea38fe Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:26:00 -0700 Subject: [PATCH 22/67] update path to netperf yaml --- .pipelines/cni/cilium/cilium-scale-test.yaml | 4 +--- hack/scripts/netperf.sh | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index a5c091c52f..9eaf4d4f7f 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -279,12 +279,10 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} chmod +x hack/scripts/netperf.sh - kubectl apply -f test/integration/manifests/netperf-pod.yaml + kubectl apply -f hack/manifests/netperf-pod.yaml kubectl rollout status deployment container6 --timeout=30s mkdir netperf - cd netperf sh hack/scripts/netperf.sh - cd .. echo "collect cpu and mem results after netperf tests" mkdir netperf_cpu_and_mem cd netperf_cpu_and_mem diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index c78649d1ba..eff419c315 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -24,7 +24,7 @@ iteration=10 while [ $iteration -ge 0 ] do echo "============ Iteration $iteration ===============" - kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "same_vm_iteration_$iteration.log" + kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "netperf/same_vm_iteration_$iteration.log" echo "===============================" sleep 5s iteration=$((iteration-1)) @@ -35,7 +35,7 @@ iteration=10 while [ $iteration -ge 0 ] do echo "============ Iteration $iteration ===============" - kubectl exec -it $diff_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "diff_vm_iteration_$iteration.log" + kubectl exec -it $diff_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "netperf/diff_vm_iteration_$iteration.log" echo "===============================" sleep 5s iteration=$((iteration-1)) From 9b619d3d246a37e8bb5857b467bafdd8a7e50f3e Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:37:04 -0700 Subject: [PATCH 23/67] change netperf deployment to 2 --- hack/manifests/netperf-pod.yaml | 2 +- hack/scripts/netperf.sh | 34 +++++---------------------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/hack/manifests/netperf-pod.yaml b/hack/manifests/netperf-pod.yaml index 0f9cf38630..5b5483cf13 100644 --- a/hack/manifests/netperf-pod.yaml +++ b/hack/manifests/netperf-pod.yaml @@ -7,7 +7,7 @@ spec: selector: matchLabels: app: container6 - replicas: 3 + replicas: 2 template: # create pods using pod definition in this template metadata: # unlike pod-nginx.yaml, the name is not included in the meta data as a unique name is diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index eff419c315..2f140ecb17 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -1,34 +1,10 @@ #!/bin/bash -for node in $(kubectl get nodes -o name); -do - echo "Current : $node" - node_name="${node##*/}" - echo "checking whether the node has any netperf pods deployed to it" - pod_count=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" -c) - netperf_pod=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" | awk '{print $1}') - echo "netperf pod : $netperf_pod" - echo "pod_count: $pod_count" - - if [ $pod_count -gt 1 ]; then - target_pod=$(echo $netperf_pod | cut -d" " -f 1) - target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') - same_vm_pod=$(echo $netperf_pod | cut -d" " -f 2) - kubectl exec -it $target_pod -- netserver - else - diff_vm_pod=$netperf_pod - fi -done -#netperf on same vm pod -iteration=10 -while [ $iteration -ge 0 ] -do - echo "============ Iteration $iteration ===============" - kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "netperf/same_vm_iteration_$iteration.log" - echo "===============================" - sleep 5s - iteration=$((iteration-1)) -done +netperf_pod=$(kubectl get pods -l app=container6 -o wide) +target_pod=$(echo $netperf_pod | cut -d" " -f 1) +target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') +diff_vm_pod=$netperf_pod +kubectl exec -it $target_pod -- netserver #netperf on different vm pod iteration=10 From 93042c6fd0472a93c44313ce93110adb73961c67 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:55:56 -0700 Subject: [PATCH 24/67] get correct pods in netperf script --- hack/scripts/netperf.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index 2f140ecb17..7068643323 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -1,9 +1,8 @@ #!/bin/bash - -netperf_pod=$(kubectl get pods -l app=container6 -o wide) -target_pod=$(echo $netperf_pod | cut -d" " -f 1) +netperf_pod=$(kubectl get pods -l app=container6 -o wide | awk '{print $1}') +target_pod=$(echo $netperf_pod | cut -f 2 -d ' ') target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') -diff_vm_pod=$netperf_pod +diff_vm_pod=$(echo $netperf_pod | cut -f 3 -d ' ') kubectl exec -it $target_pod -- netserver #netperf on different vm pod From 50c66be07736b8dfca8a639ee2ab1bc9665b0538 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 15:18:38 -0700 Subject: [PATCH 25/67] publish netperf results --- .pipelines/cni/cilium/cilium-scale-test.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 9eaf4d4f7f..ae093b3a9b 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -292,11 +292,16 @@ stages: kubectl top pod -A | grep cilium >> "pod_netperf.log" echo "Logs will be available as a build artifact" ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/netperf_cpu_and_mem/ + ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/netperf/ echo $ARTIFACT_DIR + echo $ARTIFACT_DIR2 sudo rm -rf $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR2 sudo mkdir $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR2 cd .. sudo cp ./netperf_cpu_and_mem/* $ARTIFACT_DIR + sudo cp ./netperf/* $ARTIFACT_DIR2 name: "TestLBServices" displayName: "Run Netperf tests" - task: PublishBuildArtifacts@1 @@ -306,6 +311,13 @@ stages: condition: always() name: "PublishResults" displayName: "Netperf Result Artifacts" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: before_lb_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/netperf" + condition: always() + name: "PublishNetperf" + displayName: "Netperf Result Artifacts" - stage: scale_down_cluster displayName: "Scale Down Cluster" jobs: From fe1e503b530214ebf716b95a12ed299b4c5842ad Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 15:31:50 -0700 Subject: [PATCH 26/67] publish netperf results --- .pipelines/cni/cilium/cilium-scale-test.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index ae093b3a9b..7a1e5198e0 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -302,7 +302,7 @@ stages: cd .. sudo cp ./netperf_cpu_and_mem/* $ARTIFACT_DIR sudo cp ./netperf/* $ARTIFACT_DIR2 - name: "TestLBServices" + name: "NetperfIterations" displayName: "Run Netperf tests" - task: PublishBuildArtifacts@1 inputs: @@ -310,10 +310,10 @@ stages: pathtoPublish: "$(Build.ArtifactStagingDirectory)/netperf_cpu_and_mem" condition: always() name: "PublishResults" - displayName: "Netperf Result Artifacts" + displayName: "Netperf cpu and mem Artifacts" - task: PublishBuildArtifacts@1 inputs: - artifactName: before_lb_cpu_and_mem + artifactName: netperf pathtoPublish: "$(Build.ArtifactStagingDirectory)/netperf" condition: always() name: "PublishNetperf" From cfe4dc63c8d80531956e9bc4a99b8ed2b6809a8d Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 3 Aug 2023 10:30:01 -0700 Subject: [PATCH 27/67] add same vm test for netperf --- .pipelines/cni/cilium/cilium-scale-test.yaml | 1 + hack/manifests/netperf-pod.yaml | 5 ++- hack/scripts/netperf.sh | 36 +++++++++++++++++--- test/scale/label-nodes.sh | 7 ++++ 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 7a1e5198e0..b566e8f3ee 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -267,6 +267,7 @@ stages: displayName: "Run netperf tests" jobs: - job: netperf + timeoutInMinutes: 120 pool: name: "$(BUILD_POOL_NAME_DEFAULT)" steps: diff --git a/hack/manifests/netperf-pod.yaml b/hack/manifests/netperf-pod.yaml index 5b5483cf13..74c93d882a 100644 --- a/hack/manifests/netperf-pod.yaml +++ b/hack/manifests/netperf-pod.yaml @@ -7,14 +7,17 @@ spec: selector: matchLabels: app: container6 - replicas: 2 + replicas: 3 template: # create pods using pod definition in this template metadata: # unlike pod-nginx.yaml, the name is not included in the meta data as a unique name is # generated from the deployment name labels: app: container6 + netperf: "true" spec: + nodeSelector: + netperf: "true" containers: - name: ubuntu image: tamilmani1989/ubuntu18-tools diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index 7068643323..03bee48c14 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -1,9 +1,35 @@ #!/bin/bash -netperf_pod=$(kubectl get pods -l app=container6 -o wide | awk '{print $1}') -target_pod=$(echo $netperf_pod | cut -f 2 -d ' ') -target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') -diff_vm_pod=$(echo $netperf_pod | cut -f 3 -d ' ') -kubectl exec -it $target_pod -- netserver +# find the nodes with netperf pods and assign test vars +for node in $(kubectl get nodes -o name); +do + echo "Current : $node" + node_name="${node##*/}" + echo "checking whether the node has any netperf pods deployed to it" + pod_count=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" -c) + netperf_pod=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" | awk '{print $1}') + echo "netperf pod : $netperf_pod" + echo "pod_count: $pod_count" + + if [ $pod_count -gt 1 ]; then + target_pod=$(echo $netperf_pod | cut -d" " -f 1) + target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') + same_vm_pod=$(echo $netperf_pod | cut -d" " -f 2) + kubectl exec -it $target_pod -- netserver + else + diff_vm_pod=$netperf_pod + fi +done + +#netperf on same vm pod +iteration=10 +while [ $iteration -ge 0 ] +do + echo "============ Iteration $iteration ===============" + kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "netperf/same_vm_iteration_$iteration.log" + echo "===============================" + sleep 5s + iteration=$((iteration-1)) +done #netperf on different vm pod iteration=10 diff --git a/test/scale/label-nodes.sh b/test/scale/label-nodes.sh index ec500f677b..3512dd07d7 100755 --- a/test/scale/label-nodes.sh +++ b/test/scale/label-nodes.sh @@ -1,6 +1,7 @@ #!/bin/sh cmd=$1 retries=0 +node_count=0 while [ $retries -lt 5 ]; do $cmd if [ $? -eq 0 ]; then @@ -17,11 +18,17 @@ fi for node in $(kubectl get nodes -o name); do + node_count=$((node_count + 1)) + echo $node_count echo "Current : $node" node_name="${node##*/}" echo "Apply label to the node" kubectl label node $node_name connectivity-test=true kubectl label node $node_name scale-test=true + if [ $node_count -lt 3 ]; then + kubectl label node $node_name netperf=true + echo "labeled node for netperf testing" + fi if [ $? -eq 0 ]; then echo "Label applied to the node" else From d3dcad8bf540c7299e103359f571cf57509b169d Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 3 Aug 2023 13:46:02 -0700 Subject: [PATCH 28/67] netperf script to print pod values --- hack/scripts/netperf.sh | 43 +++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index 03bee48c14..8bd2104d2c 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -1,25 +1,38 @@ #!/bin/bash # find the nodes with netperf pods and assign test vars +node_found=0 for node in $(kubectl get nodes -o name); do - echo "Current : $node" - node_name="${node##*/}" - echo "checking whether the node has any netperf pods deployed to it" - pod_count=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" -c) - netperf_pod=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" | awk '{print $1}') - echo "netperf pod : $netperf_pod" - echo "pod_count: $pod_count" + while [ $node_found -lt 3 ] + do + echo "Current : $node" + node_name="${node##*/}" + echo "checking whether the node has any netperf pods deployed to it" + pod_count=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" -c) + netperf_pod=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" | awk '{print $1}') + echo "netperf pod : $netperf_pod" + echo "pod_count: $pod_count" - if [ $pod_count -gt 1 ]; then - target_pod=$(echo $netperf_pod | cut -d" " -f 1) - target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') - same_vm_pod=$(echo $netperf_pod | cut -d" " -f 2) - kubectl exec -it $target_pod -- netserver - else - diff_vm_pod=$netperf_pod - fi + if [ $pod_count -gt 1 ]; then + target_pod=$(echo $netperf_pod | cut -d" " -f 1) + target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') + same_vm_pod=$(echo $netperf_pod | cut -d" " -f 2) + kubectl exec -it $target_pod -- netserver + node_found=$((node_found + 1)) + echo "Number of nodes found with netperf pod: $node_found" + else + diff_vm_pod=$netperf_pod + node_found=$((node_found + 1)) + echo "Number of nodes found with netperf pod: $node_found" + fi + done done +echo "target netperf pod: $target_pod" +echo "target netperf pod IP: $target_pod_ip" +echo "same vm pod: $same_vm_pod" +echo "different vm pod: $diff_vm_pod" + #netperf on same vm pod iteration=10 while [ $iteration -ge 0 ] From 4417a4b19374c10651394e342ac04e82e37ff39c Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 3 Aug 2023 16:51:23 -0700 Subject: [PATCH 29/67] netperf find nodes logic --- hack/scripts/netperf.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index 8bd2104d2c..d560d9f3a1 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -3,8 +3,7 @@ node_found=0 for node in $(kubectl get nodes -o name); do - while [ $node_found -lt 3 ] - do + if [ $node_found -lt 3 ]; then echo "Current : $node" node_name="${node##*/}" echo "checking whether the node has any netperf pods deployed to it" @@ -25,7 +24,7 @@ do node_found=$((node_found + 1)) echo "Number of nodes found with netperf pod: $node_found" fi - done + fi done echo "target netperf pod: $target_pod" From b783de2d17f0d862e2df6665183b3bc39c606212 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:15:30 -0700 Subject: [PATCH 30/67] netperf find nodes logic --- hack/scripts/netperf.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index d560d9f3a1..6d1be52b4d 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -3,7 +3,7 @@ node_found=0 for node in $(kubectl get nodes -o name); do - if [ $node_found -lt 3 ]; then + if [ $node_found -lt 2 ]; then echo "Current : $node" node_name="${node##*/}" echo "checking whether the node has any netperf pods deployed to it" From 6528aea887296b0dd97611850cfed222964f9bef Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 25 Jul 2023 15:27:42 -0700 Subject: [PATCH 31/67] ci: adding scale pipeline for cilium --- .pipelines/cni/cilium/cilium-scale-test.yaml | 210 ++++++++++++++++++ .../cni/scale-test-templates/apache.yaml | 18 ++ hack/aks/Makefile | 3 + 3 files changed, 231 insertions(+) create mode 100644 .pipelines/cni/cilium/cilium-scale-test.yaml create mode 100644 .pipelines/cni/scale-test-templates/apache.yaml diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml new file mode 100644 index 0000000000..16aaeb3e61 --- /dev/null +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -0,0 +1,210 @@ +pr: none +trigger: none + +stages: + - stage: update_cilium_version + displayName: "Update Cilium Version and Restart Nodes" + jobs: + - job: update_version + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "Redeploy all cilium components and update cilium version. Redeploy all to catch all changes between versions" + echo "deploy Cilium ConfigMap" + kubectl apply -f cilium/configmap.yaml + kubectl apply -f test/integration/manifests/cilium/cilium-config.yaml + echo "install Cilium ${CILIUM_VERSION_TAG}" + envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/daemonset.yaml | kubectl apply -f - + envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/deployment.yaml | kubectl apply -f - + kubectl apply -f test/integration/manifests/cilium/cilium-agent + kubectl apply -f test/integration/manifests/cilium/cilium-operator + vmss_name=$(az vmss list -g MC_${CLUSTER}_${CLUSTER}_$(LOCATION) --query "[].name" -o tsv) + make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${CLUSTER} REGION=$(LOCATION) VMSS_NAME=$vmss_name + kubectl get node + kubectl get pod -A + name: "UpdateCiliumVersion" + displayName: "Update Cilium Version" + - stage: scale_up_cluster + displayName: "Scale Up Cluster" + jobs: + - job: scale_up1000 + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -ex + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "Scaling to 1000 nodes" + az aks nodepool scale --name nodepool1 --cluster-name ${CLUSTER} --resource-group ${CLUSTER} --node-count 1000 + name: "ScaleUp1000" + displayName: "Scale up to 1000 Nodes" + timeoutInMinutes: 0 + - stage: label_nodes + displayName: "Label Nodes for Testing" + jobs: + - job: label_nodes + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + echo "Set node label scale-test=true and connectivity-test=true for testing" + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + cd test/scale + chmod +x label-nodes.sh + ./label-nodes.sh + name: "LabelNodes" + displayName: "Label all Nodes" + - stage: scale_cluster_deployments + displayName: "Scale deploments for Network Policies Check" + jobs: + - job: scale_deployments + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + echo "scale deployment and to prep for network policies test" + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + cd test/scale + chmod +x test-scale.sh + ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=1 --num-real-replicas=80 --num-network-policies=2000 --num-unapplied-network-policies=2000 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels + name: "scaling" + displayName: "Run scale script" + - stage: test_network_policies_connectivity + displayName: "Test Network Policies" + jobs: + - job: network_policies + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + echo "Run network policies test" + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + cd test/scale/connectivity + chmod +x test-connectivity.sh + ./test-connectivity.sh --num-scale-pods-to-verify=80 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + name: "TestNetworkPolicies" + displayName: "Network Policies Scale Test" + timeoutInMinutes: 120 + - stage: scale_for_load_tests + displayName: "Scale for load tests" + jobs: + - job: deploy_service + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + cd test/scale + chmod +x test-scale.sh + ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=0 --num-real-replicas=60 --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels --num-real-nginx-deployments=1 + name: "TestLBServices" + displayName: "Scale for load tests" + - stage: benchmark_testing + displayName: "Run apachebench test" + jobs: + - job: ap_test + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "Deploy apachebench pod and run test" + cd .pipelines/cni/scale-test-templates + kubectl apply -f apache.yaml + echo "wait for pod to become ready" + sleep 5s + kubectl get pod -owide + mkdir results + cd results + AB_POD=$(kubectl get pod -l app=apachebench | grep apachebench | awk '{print $1}') + kubectl exec -it $AB_POD -- ab -n 100000 -c 3 -r http://real-svc-00001.scale-test/ >> "ab_100krequests_60kpods.log" + echo "running k top node" + kubectl top node >> "node_cpu_and_mem.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_cpu_and_mem.log" + echo "Logs will be available as a build artifact" + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/results/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR + cd .. + sudo cp ./results/* $ARTIFACT_DIR + name: "TestLBServices" + displayName: "Apachebench testing" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: results + pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" + condition: always() + name: "PublishResults" + displayName: "Apachebench Result Artifacts" + - stage: scale_down_cluster + displayName: "Scale Down Cluster" + jobs: + - job: scale_down + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + set -ex + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "Scaling to 5 nodes" + vmss_name=$(az vmss list -g MC_${CLUSTER}_${CLUSTER}_$(LOCATION) --query "[].name" -o tsv) + make -C ./hack/aks scale-vmss AZCLI=az CLUSTER=${CLUSTER} REGION=$(LOCATION) VMSS_NAME=$vmss_name NODE_COUNT=5 + kubectl get node + name: "ScaleDown" + displayName: "Scale down to 5 Nodes" + timeoutInMinutes: 0 + - stage: delete_test_namespaces + displayName: "Delete Test Namespaces" + jobs: + - job: delete_namespaces + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + echo "scale deployment and test network policies" + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + kubectl delete ns scale-test + kubectl delete ns connectivity-test + kubectl get ns + name: "DeleteTestNamespaces" + displayName: "Delete Test Namespaces" diff --git a/.pipelines/cni/scale-test-templates/apache.yaml b/.pipelines/cni/scale-test-templates/apache.yaml new file mode 100644 index 0000000000..1a06512d4e --- /dev/null +++ b/.pipelines/cni/scale-test-templates/apache.yaml @@ -0,0 +1,18 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: apachebench +spec: + selector: + matchLabels: + app: apachebench + template: + metadata: + labels: + app: apachebench + spec: + containers: + - name: ubuntu-tools + image: tamilmani1989/ubuntu18-tools + command: ["/bin/sleep", "3650d"] + \ No newline at end of file diff --git a/hack/aks/Makefile b/hack/aks/Makefile index 6091dc88a1..18291243ed 100644 --- a/hack/aks/Makefile +++ b/hack/aks/Makefile @@ -235,3 +235,6 @@ down: ## Delete the cluster restart-vmss: ## Restarts the nodes in the cluster $(AZCLI) vmss restart -g MC_${GROUP}_${CLUSTER}_${REGION} --name $(VMSS_NAME) + +scale-vmss: ## Scales the nodes in the cluster + $(AZCLI) vmss scale -g MC_${GROUP}_${CLUSTER}_${REGION} --name $(VMSS_NAME) --new-capacity $(NODE_COUNT) From 093abeb083fb5a76e429c37d3a85f2c15f14d86a Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 26 Jul 2023 09:12:42 -0700 Subject: [PATCH 32/67] change timeout --- .pipelines/cni/cilium/cilium-scale-test.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 16aaeb3e61..eabfdde836 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -91,6 +91,7 @@ stages: displayName: "Test Network Policies" jobs: - job: network_policies + timeoutInMinutes: 120 steps: - task: AzureCLI@1 inputs: @@ -103,10 +104,9 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale/connectivity chmod +x test-connectivity.sh - ./test-connectivity.sh --num-scale-pods-to-verify=80 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + ./test-connectivity.sh --num-scale-pods-to-verify=40 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 name: "TestNetworkPolicies" displayName: "Network Policies Scale Test" - timeoutInMinutes: 120 - stage: scale_for_load_tests displayName: "Scale for load tests" jobs: @@ -122,7 +122,7 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale chmod +x test-scale.sh - ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=0 --num-real-replicas=60 --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels --num-real-nginx-deployments=1 + ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=1 --num-real-replicas=60 --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels --real-pod-type=nginx name: "TestLBServices" displayName: "Scale for load tests" - stage: benchmark_testing From d3d6e1d0590090d0e1c4c2fc3ba0c28e201dc277 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 26 Jul 2023 09:17:13 -0700 Subject: [PATCH 33/67] change timeout --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index eabfdde836..0a727f407b 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -104,7 +104,7 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale/connectivity chmod +x test-connectivity.sh - ./test-connectivity.sh --num-scale-pods-to-verify=40 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + ./test-connectivity.sh --num-scale-pods-to-verify=80 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 name: "TestNetworkPolicies" displayName: "Network Policies Scale Test" - stage: scale_for_load_tests From e0b12197ddfcff3cc10747309ba5008ba84f5ae1 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 26 Jul 2023 14:37:10 -0700 Subject: [PATCH 34/67] update net policies --- .pipelines/cni/cilium/cilium-scale-test.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 0a727f407b..75a35237b1 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -104,9 +104,9 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale/connectivity chmod +x test-connectivity.sh - ./test-connectivity.sh --num-scale-pods-to-verify=80 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + ./test-connectivity.sh --num-scale-pods-to-verify=60 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 name: "TestNetworkPolicies" - displayName: "Network Policies Scale Test" + displayName: "Network Policies Connectivity Test" - stage: scale_for_load_tests displayName: "Scale for load tests" jobs: @@ -128,7 +128,7 @@ stages: - stage: benchmark_testing displayName: "Run apachebench test" jobs: - - job: ap_test + - job: apachebench_test steps: - task: AzureCLI@1 inputs: From 9243ebb85f02ccd43e21983ded3934ee5feec8a6 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 26 Jul 2023 16:27:18 -0700 Subject: [PATCH 35/67] use acn build pool to avoid azp delays --- .pipelines/cni/cilium/cilium-scale-test.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 75a35237b1..0b7761409f 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -6,6 +6,8 @@ stages: displayName: "Update Cilium Version and Restart Nodes" jobs: - job: update_version + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -34,6 +36,8 @@ stages: displayName: "Scale Up Cluster" jobs: - job: scale_up1000 + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -53,6 +57,8 @@ stages: displayName: "Label Nodes for Testing" jobs: - job: label_nodes + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -72,6 +78,8 @@ stages: displayName: "Scale deploments for Network Policies Check" jobs: - job: scale_deployments + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -91,6 +99,8 @@ stages: displayName: "Test Network Policies" jobs: - job: network_policies + pool: + name: "$(BUILD_POOL_ACN)" timeoutInMinutes: 120 steps: - task: AzureCLI@1 @@ -111,6 +121,8 @@ stages: displayName: "Scale for load tests" jobs: - job: deploy_service + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -129,6 +141,8 @@ stages: displayName: "Run apachebench test" jobs: - job: apachebench_test + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -172,6 +186,8 @@ stages: displayName: "Scale Down Cluster" jobs: - job: scale_down + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: @@ -193,6 +209,8 @@ stages: displayName: "Delete Test Namespaces" jobs: - job: delete_namespaces + pool: + name: "$(BUILD_POOL_ACN)" steps: - task: AzureCLI@1 inputs: From aa81448139e0c2862a64e50578fd86ccfc1455a2 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 27 Jul 2023 11:29:35 -0700 Subject: [PATCH 36/67] address comments -- update build pool var and remove timeouts from scale --- .pipelines/cni/cilium/cilium-scale-test.yaml | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 0b7761409f..bb8b8e3f2c 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -7,7 +7,7 @@ stages: jobs: - job: update_version pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -37,7 +37,7 @@ stages: jobs: - job: scale_up1000 pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -52,13 +52,12 @@ stages: az aks nodepool scale --name nodepool1 --cluster-name ${CLUSTER} --resource-group ${CLUSTER} --node-count 1000 name: "ScaleUp1000" displayName: "Scale up to 1000 Nodes" - timeoutInMinutes: 0 - stage: label_nodes displayName: "Label Nodes for Testing" jobs: - job: label_nodes pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -79,7 +78,7 @@ stages: jobs: - job: scale_deployments pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -100,7 +99,7 @@ stages: jobs: - job: network_policies pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" timeoutInMinutes: 120 steps: - task: AzureCLI@1 @@ -122,7 +121,7 @@ stages: jobs: - job: deploy_service pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -142,7 +141,7 @@ stages: jobs: - job: apachebench_test pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -187,7 +186,7 @@ stages: jobs: - job: scale_down pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: @@ -204,13 +203,12 @@ stages: kubectl get node name: "ScaleDown" displayName: "Scale down to 5 Nodes" - timeoutInMinutes: 0 - stage: delete_test_namespaces displayName: "Delete Test Namespaces" jobs: - job: delete_namespaces pool: - name: "$(BUILD_POOL_ACN)" + name: "$(BUILD_POOL_NAME_DEFAULT)" steps: - task: AzureCLI@1 inputs: From e0cc798bbd959c3b59e26d5177e21e1c0badf91e Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Fri, 28 Jul 2023 09:33:29 -0700 Subject: [PATCH 37/67] address comments -- set node/pod counts and test input as variables --- .pipelines/cni/cilium/cilium-scale-test.yaml | 20 +++++++++---------- .../manifests}/apache.yaml | 0 2 files changed, 10 insertions(+), 10 deletions(-) rename {.pipelines/cni/scale-test-templates => hack/manifests}/apache.yaml (100%) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index bb8b8e3f2c..550cea4531 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -48,10 +48,10 @@ stages: inlineScript: | set -ex az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} - echo "Scaling to 1000 nodes" - az aks nodepool scale --name nodepool1 --cluster-name ${CLUSTER} --resource-group ${CLUSTER} --node-count 1000 - name: "ScaleUp1000" - displayName: "Scale up to 1000 Nodes" + echo "Scaling up nodes" + az aks nodepool scale --name nodepool1 --cluster-name ${CLUSTER} --resource-group ${CLUSTER} --node-count ${NODE_COUNT_UP} + name: "ScaleUp" + displayName: "Scale up Nodes" - stage: label_nodes displayName: "Label Nodes for Testing" jobs: @@ -91,7 +91,7 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale chmod +x test-scale.sh - ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=1 --num-real-replicas=80 --num-network-policies=2000 --num-unapplied-network-policies=2000 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels + ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_NETPOL} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_NETPOL} --num-real-replicas=${NUM_REAL_REPLICAS_NETPOL} --num-network-policies=${APPLIED_NETPOL} --num-unapplied-network-policies=${UNAPPLIED_NETPOL} --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_NETPOL} --delete-labels name: "scaling" displayName: "Run scale script" - stage: test_network_policies_connectivity @@ -113,7 +113,7 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale/connectivity chmod +x test-connectivity.sh - ./test-connectivity.sh --num-scale-pods-to-verify=60 --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + ./test-connectivity.sh --num-scale-pods-to-verify=${NUM_SCALE_PODS_TO_VERIFY} --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 name: "TestNetworkPolicies" displayName: "Network Policies Connectivity Test" - stage: scale_for_load_tests @@ -133,7 +133,7 @@ stages: az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} cd test/scale chmod +x test-scale.sh - ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=200 --num-real-deployments=1 --num-real-replicas=60 --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=1 --delete-labels --real-pod-type=nginx + ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_LB} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_LB} --num-real-replicas=${NUM_REAL_REPLICAS_LB} --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_LB} --delete-labels --real-pod-type=nginx name: "TestLBServices" displayName: "Scale for load tests" - stage: benchmark_testing @@ -152,7 +152,7 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} echo "Deploy apachebench pod and run test" - cd .pipelines/cni/scale-test-templates + cd hack/manifests kubectl apply -f apache.yaml echo "wait for pod to become ready" sleep 5s @@ -160,7 +160,7 @@ stages: mkdir results cd results AB_POD=$(kubectl get pod -l app=apachebench | grep apachebench | awk '{print $1}') - kubectl exec -it $AB_POD -- ab -n 100000 -c 3 -r http://real-svc-00001.scale-test/ >> "ab_100krequests_60kpods.log" + kubectl exec -it $AB_POD -- ab -n ${AB_REQUESTS} -c ${AB_CONCURRENCY} -r http://real-svc-00001.scale-test/ >> "ab_${AB_REQUESTS}krequests_${NUM_REAL_REPLICAS_LB}kpods.log" echo "running k top node" kubectl top node >> "node_cpu_and_mem.log" echo "running k top pod" @@ -217,7 +217,7 @@ stages: scriptType: "bash" addSpnToEnvironment: true inlineScript: | - echo "scale deployment and test network policies" + echo "delete namespaces scale-test and connectivity-test" az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} kubectl delete ns scale-test kubectl delete ns connectivity-test diff --git a/.pipelines/cni/scale-test-templates/apache.yaml b/hack/manifests/apache.yaml similarity index 100% rename from .pipelines/cni/scale-test-templates/apache.yaml rename to hack/manifests/apache.yaml From dee90f740d2ee69ab14e04c0a5e6bdb69fe87288 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Fri, 28 Jul 2023 12:15:30 -0700 Subject: [PATCH 38/67] remove all test resources --- .pipelines/cni/cilium/cilium-scale-test.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 550cea4531..b0d29f413c 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -35,7 +35,7 @@ stages: - stage: scale_up_cluster displayName: "Scale Up Cluster" jobs: - - job: scale_up1000 + - job: scale_up pool: name: "$(BUILD_POOL_NAME_DEFAULT)" steps: @@ -160,7 +160,7 @@ stages: mkdir results cd results AB_POD=$(kubectl get pod -l app=apachebench | grep apachebench | awk '{print $1}') - kubectl exec -it $AB_POD -- ab -n ${AB_REQUESTS} -c ${AB_CONCURRENCY} -r http://real-svc-00001.scale-test/ >> "ab_${AB_REQUESTS}krequests_${NUM_REAL_REPLICAS_LB}kpods.log" + kubectl exec -it $AB_POD -- ab -n ${AB_REQUESTS} -c ${AB_CONCURRENCY} -r http://real-svc-00001.scale-test/ >> "ab_${AB_REQUESTS}requests_${NUM_REAL_REPLICAS_LB}kpods.log" echo "running k top node" kubectl top node >> "node_cpu_and_mem.log" echo "running k top pod" @@ -217,10 +217,12 @@ stages: scriptType: "bash" addSpnToEnvironment: true inlineScript: | - echo "delete namespaces scale-test and connectivity-test" + echo "delete test resources and namespaces" az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} kubectl delete ns scale-test kubectl delete ns connectivity-test kubectl get ns + cd hack/manifests + kubectl delete -f apache.yaml name: "DeleteTestNamespaces" displayName: "Delete Test Namespaces" From 6a8c5243bdbef96eaff3eb5cc1f8c05e92b26056 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Mon, 31 Jul 2023 10:10:52 -0700 Subject: [PATCH 39/67] check apachebench rollout status --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index b0d29f413c..3fb0e07f11 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -155,7 +155,7 @@ stages: cd hack/manifests kubectl apply -f apache.yaml echo "wait for pod to become ready" - sleep 5s + kubectl rollout status deployment apachebench --timeout=30s kubectl get pod -owide mkdir results cd results From ce9825b4ba40629305f6ec2238ffe9cd47d5af36 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 09:34:05 -0700 Subject: [PATCH 40/67] collect more cpu/mem results --- .pipelines/cni/cilium/cilium-scale-test.yaml | 33 +++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 3fb0e07f11..71f1759bbe 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -87,8 +87,18 @@ stages: scriptType: "bash" addSpnToEnvironment: true inlineScript: | - echo "scale deployment and to prep for network policies test" az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "collect cpu and memory usage before scaling for network policies" + mkdir results + cd results + mkdir cpu_and_mem + cd cpu_and_mem + echo "running k top node" + kubectl top node >> "node_before_netpol_scale.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_before_netpol_scale.log" + cd ../.. + echo "scale deployment and to prep for network policies test" cd test/scale chmod +x test-scale.sh ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_NETPOL} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_NETPOL} --num-real-replicas=${NUM_REAL_REPLICAS_NETPOL} --num-network-policies=${APPLIED_NETPOL} --num-unapplied-network-policies=${UNAPPLIED_NETPOL} --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_NETPOL} --delete-labels @@ -114,6 +124,12 @@ stages: cd test/scale/connectivity chmod +x test-connectivity.sh ./test-connectivity.sh --num-scale-pods-to-verify=${NUM_SCALE_PODS_TO_VERIFY} --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 + echo "collect cpu and mem results after connectivity tests" + cd results/cpu_and_mem + echo "running k top node" + kubectl top node >> "node_after_netpol_tests.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_after_netpol_tests.log" name: "TestNetworkPolicies" displayName: "Network Policies Connectivity Test" - stage: scale_for_load_tests @@ -131,6 +147,13 @@ stages: addSpnToEnvironment: true inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + echo "collect cpu and mem results before scale for lb tests" + cd results/cpu_and_mem + echo "running k top node" + kubectl top node >> "node_before_lb_scale.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_before_lb_scale.log" + cd ../.. cd test/scale chmod +x test-scale.sh ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_LB} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_LB} --num-real-replicas=${NUM_REAL_REPLICAS_LB} --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_LB} --delete-labels --real-pod-type=nginx @@ -157,14 +180,16 @@ stages: echo "wait for pod to become ready" kubectl rollout status deployment apachebench --timeout=30s kubectl get pod -owide - mkdir results cd results AB_POD=$(kubectl get pod -l app=apachebench | grep apachebench | awk '{print $1}') kubectl exec -it $AB_POD -- ab -n ${AB_REQUESTS} -c ${AB_CONCURRENCY} -r http://real-svc-00001.scale-test/ >> "ab_${AB_REQUESTS}requests_${NUM_REAL_REPLICAS_LB}kpods.log" + echo "collect cpu and memory usage after apachebench tests" + cd cpu_and_mem echo "running k top node" - kubectl top node >> "node_cpu_and_mem.log" + kubectl top node >> "node_after_lb_tests.log" echo "running k top pod" - kubectl top pod -A | grep cilium >> "pod_cpu_and_mem.log" + kubectl top pod -A | grep cilium >> "pod_after_lb_tests.log" + cd .. echo "Logs will be available as a build artifact" ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/results/ echo $ARTIFACT_DIR From 688d35d4496b75762c39a350631e68545c9ccae5 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 12:58:47 -0700 Subject: [PATCH 41/67] add cns restart and fix artifact upload --- .pipelines/cni/cilium/cilium-scale-test.yaml | 97 ++++++++++++++++---- 1 file changed, 78 insertions(+), 19 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 71f1759bbe..7728968009 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -2,8 +2,8 @@ pr: none trigger: none stages: - - stage: update_cilium_version - displayName: "Update Cilium Version and Restart Nodes" + - stage: update_daemonset_versions + displayName: "Update Cilium + CNS Version and Restart Nodes" jobs: - job: update_version pool: @@ -26,12 +26,18 @@ stages: envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/deployment.yaml | kubectl apply -f - kubectl apply -f test/integration/manifests/cilium/cilium-agent kubectl apply -f test/integration/manifests/cilium/cilium-operator + echo "Checking CNS version is up to date. Update if version is old." + CNS_IMAGE=${CNS_IMAGE} + if [[ grep -A2 'containers:' test/integration/manifests/cns/daemonset.yaml | tail -n1 | awk '{ print $2}' -neq $CNS_IMAGE ]]; then + sed -i '/containers:/{n;n;s/\(image\).*/\1: '"${CNS_IMAGE//\//\\/}"'/}' test/integration/manifests/cns/daemonset.yaml + kubectl apply -f test/integration/manifests/cns/daemonset.yaml + fi vmss_name=$(az vmss list -g MC_${CLUSTER}_${CLUSTER}_$(LOCATION) --query "[].name" -o tsv) make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${CLUSTER} REGION=$(LOCATION) VMSS_NAME=$vmss_name kubectl get node kubectl get pod -A - name: "UpdateCiliumVersion" - displayName: "Update Cilium Version" + name: "UpdateCiliumandCNSVersion" + displayName: "Update Cilium and CNS Version" - stage: scale_up_cluster displayName: "Scale Up Cluster" jobs: @@ -89,21 +95,32 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} echo "collect cpu and memory usage before scaling for network policies" - mkdir results - cd results - mkdir cpu_and_mem - cd cpu_and_mem + mkdir before_netpol_cpu_and_mem + cd before_netpol_cpu_and_mem echo "running k top node" kubectl top node >> "node_before_netpol_scale.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_before_netpol_scale.log" - cd ../.. + echo "Logs will be available as a build artifact" + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/before_netpol_cpu_and_mem/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR + cd .. + sudo cp ./before_netpol_cpu_and_mem/* $ARTIFACT_DIR echo "scale deployment and to prep for network policies test" cd test/scale chmod +x test-scale.sh ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_NETPOL} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_NETPOL} --num-real-replicas=${NUM_REAL_REPLICAS_NETPOL} --num-network-policies=${APPLIED_NETPOL} --num-unapplied-network-policies=${UNAPPLIED_NETPOL} --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_NETPOL} --delete-labels name: "scaling" displayName: "Run scale script" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: before_netpol_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: test_network_policies_connectivity displayName: "Test Network Policies" jobs: @@ -125,13 +142,28 @@ stages: chmod +x test-connectivity.sh ./test-connectivity.sh --num-scale-pods-to-verify=${NUM_SCALE_PODS_TO_VERIFY} --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 echo "collect cpu and mem results after connectivity tests" - cd results/cpu_and_mem + mkdir after_netpol_cpu_and_mem + cd after_netpol_cpu_and_mem echo "running k top node" kubectl top node >> "node_after_netpol_tests.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_after_netpol_tests.log" + echo "Logs will be available as a build artifact" + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/after_netpol_cpu_and_mem/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR + cd .. + sudo cp ./after_netpol_cpu_and_mem/* $ARTIFACT_DIR name: "TestNetworkPolicies" displayName: "Network Policies Connectivity Test" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: after_netpol_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_netpol_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: scale_for_load_tests displayName: "Scale for load tests" jobs: @@ -148,17 +180,31 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} echo "collect cpu and mem results before scale for lb tests" - cd results/cpu_and_mem + mkdir before_lb_cpu_and_mem + cd before_lb_cpu_and_mem echo "running k top node" kubectl top node >> "node_before_lb_scale.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_before_lb_scale.log" - cd ../.. + echo "Logs will be available as a build artifact" + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/before_lb_cpu_and_mem/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR + cd .. + sudo cp ./before_lb_cpu_and_mem/* $ARTIFACT_DIR cd test/scale chmod +x test-scale.sh ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_LB} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_LB} --num-real-replicas=${NUM_REAL_REPLICAS_LB} --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_LB} --delete-labels --real-pod-type=nginx name: "TestLBServices" displayName: "Scale for load tests" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: before_lb_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/before_lb_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: benchmark_testing displayName: "Run apachebench test" jobs: @@ -180,32 +226,45 @@ stages: echo "wait for pod to become ready" kubectl rollout status deployment apachebench --timeout=30s kubectl get pod -owide - cd results + mkdir apachebench + cd apachebench AB_POD=$(kubectl get pod -l app=apachebench | grep apachebench | awk '{print $1}') kubectl exec -it $AB_POD -- ab -n ${AB_REQUESTS} -c ${AB_CONCURRENCY} -r http://real-svc-00001.scale-test/ >> "ab_${AB_REQUESTS}requests_${NUM_REAL_REPLICAS_LB}kpods.log" echo "collect cpu and memory usage after apachebench tests" - cd cpu_and_mem + cd .. + mkdir after_lb_cpu_and_mem echo "running k top node" kubectl top node >> "node_after_lb_tests.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_after_lb_tests.log" cd .. echo "Logs will be available as a build artifact" - ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/results/ + ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/after_lb_cpu_and_mem/ + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/apachebench/ echo $ARTIFACT_DIR - sudo rm -rf $ARTIFACT_DIR + echo $ARTIFACT_DIR2 + sudo rm -rf $ARTIFACT_DIR $ARTIFACT_DIR2 sudo mkdir $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR2 cd .. - sudo cp ./results/* $ARTIFACT_DIR + sudo cp ./apachebench/* $ARTIFACT_DIR + sudo cp ./after_lb_cpu_and_mem/* $ARTIFACT_DIR2 name: "TestLBServices" displayName: "Apachebench testing" - task: PublishBuildArtifacts@1 inputs: - artifactName: results - pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" + artifactName: apachebench + pathtoPublish: "$(Build.ArtifactStagingDirectory)/apachebench" condition: always() name: "PublishResults" displayName: "Apachebench Result Artifacts" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: after_lb_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_lb_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: scale_down_cluster displayName: "Scale Down Cluster" jobs: From 45057975c92441261cbbe2b3cb35e1a6e524941f Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:00:26 -0700 Subject: [PATCH 42/67] add cns restart and fix artifact upload --- .pipelines/cni/cilium/cilium-scale-test.yaml | 36 ++++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 7728968009..8cc866011e 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -115,12 +115,12 @@ stages: name: "scaling" displayName: "Run scale script" - task: PublishBuildArtifacts@1 - inputs: - artifactName: before_netpol_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" - condition: always() - name: "PublishResults" - displayName: "Result Artifacts" + inputs: + artifactName: before_netpol_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: test_network_policies_connectivity displayName: "Test Network Policies" jobs: @@ -158,12 +158,12 @@ stages: name: "TestNetworkPolicies" displayName: "Network Policies Connectivity Test" - task: PublishBuildArtifacts@1 - inputs: - artifactName: after_netpol_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_netpol_cpu_and_mem" - condition: always() - name: "PublishResults" - displayName: "Result Artifacts" + inputs: + artifactName: after_netpol_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_netpol_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: scale_for_load_tests displayName: "Scale for load tests" jobs: @@ -199,12 +199,12 @@ stages: name: "TestLBServices" displayName: "Scale for load tests" - task: PublishBuildArtifacts@1 - inputs: - artifactName: before_lb_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/before_lb_cpu_and_mem" - condition: always() - name: "PublishResults" - displayName: "Result Artifacts" + inputs: + artifactName: before_lb_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/before_lb_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Result Artifacts" - stage: benchmark_testing displayName: "Run apachebench test" jobs: From b51c478e33246aa3da4d6e04bcfcd2c93e78d7f5 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:01:11 -0700 Subject: [PATCH 43/67] update name for artifact publishing --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 8cc866011e..076446bd68 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -263,7 +263,7 @@ stages: artifactName: after_lb_cpu_and_mem pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_lb_cpu_and_mem" condition: always() - name: "PublishResults" + name: "PublishResults2" displayName: "Result Artifacts" - stage: scale_down_cluster displayName: "Scale Down Cluster" From c4df0ad3d8b6736df437e39e40f8a3fab8832f53 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:12:20 -0700 Subject: [PATCH 44/67] update apachebench artifact collection --- .pipelines/cni/cilium/cilium-scale-test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 076446bd68..e61e10b358 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -233,6 +233,7 @@ stages: echo "collect cpu and memory usage after apachebench tests" cd .. mkdir after_lb_cpu_and_mem + cd after_lb_cpu_and_mem echo "running k top node" kubectl top node >> "node_after_lb_tests.log" echo "running k top pod" From c6df1c644e274c09141ed6aea713fbbda47be885 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:19:36 -0700 Subject: [PATCH 45/67] update apachebench artifact collection --- .pipelines/cni/cilium/cilium-scale-test.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index e61e10b358..53683978a7 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -247,7 +247,6 @@ stages: sudo rm -rf $ARTIFACT_DIR $ARTIFACT_DIR2 sudo mkdir $ARTIFACT_DIR sudo mkdir $ARTIFACT_DIR2 - cd .. sudo cp ./apachebench/* $ARTIFACT_DIR sudo cp ./after_lb_cpu_and_mem/* $ARTIFACT_DIR2 name: "TestLBServices" From c34720a0e965d996fe74eb63fd10722c9a6e262f Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:41:50 -0700 Subject: [PATCH 46/67] test cns version check --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 53683978a7..0c292f2098 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -28,7 +28,7 @@ stages: kubectl apply -f test/integration/manifests/cilium/cilium-operator echo "Checking CNS version is up to date. Update if version is old." CNS_IMAGE=${CNS_IMAGE} - if [[ grep -A2 'containers:' test/integration/manifests/cns/daemonset.yaml | tail -n1 | awk '{ print $2}' -neq $CNS_IMAGE ]]; then + if [[ grep -q -e -A2 'containers:' test/integration/manifests/cns/daemonset.yaml | tail -n1 | awk '{ print $2 }' -ne $CNS_IMAGE ]]; then sed -i '/containers:/{n;n;s/\(image\).*/\1: '"${CNS_IMAGE//\//\\/}"'/}' test/integration/manifests/cns/daemonset.yaml kubectl apply -f test/integration/manifests/cns/daemonset.yaml fi From 9589c45d077acd56244dc3877d4178d1ceff6251 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 13:46:52 -0700 Subject: [PATCH 47/67] change cns update --- .pipelines/cni/cilium/cilium-scale-test.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 0c292f2098..d3428f4ba6 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -26,12 +26,10 @@ stages: envsubst '${CILIUM_VERSION_TAG},${CILIUM_IMAGE_REGISTRY}' < test/integration/manifests/cilium/deployment.yaml | kubectl apply -f - kubectl apply -f test/integration/manifests/cilium/cilium-agent kubectl apply -f test/integration/manifests/cilium/cilium-operator - echo "Checking CNS version is up to date. Update if version is old." + echo "Keep CNS version up to date, grabbing pipeline parameter" CNS_IMAGE=${CNS_IMAGE} - if [[ grep -q -e -A2 'containers:' test/integration/manifests/cns/daemonset.yaml | tail -n1 | awk '{ print $2 }' -ne $CNS_IMAGE ]]; then - sed -i '/containers:/{n;n;s/\(image\).*/\1: '"${CNS_IMAGE//\//\\/}"'/}' test/integration/manifests/cns/daemonset.yaml - kubectl apply -f test/integration/manifests/cns/daemonset.yaml - fi + sed -i '/containers:/{n;n;s/\(image\).*/\1: '"${CNS_IMAGE//\//\\/}"'/}' test/integration/manifests/cns/daemonset.yaml + kubectl apply -f test/integration/manifests/cns/daemonset.yaml vmss_name=$(az vmss list -g MC_${CLUSTER}_${CLUSTER}_$(LOCATION) --query "[].name" -o tsv) make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${CLUSTER} REGION=$(LOCATION) VMSS_NAME=$vmss_name kubectl get node From a91dd8b962ad6a113a03c959f1666b7fa6bdcc53 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Tue, 1 Aug 2023 16:23:16 -0700 Subject: [PATCH 48/67] update artifact directory name --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index d3428f4ba6..f1e5b241ae 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -115,7 +115,7 @@ stages: - task: PublishBuildArtifacts@1 inputs: artifactName: before_netpol_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/results" + pathtoPublish: "$(Build.ArtifactStagingDirectory)/before_netpol_cpu_and_mem" condition: always() name: "PublishResults" displayName: "Result Artifacts" From 6c91845b345c98271e64737805b05e488288db54 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 11:09:51 -0700 Subject: [PATCH 49/67] add netperf testing stage --- .pipelines/cni/cilium/cilium-scale-test.yaml | 45 ++++++++++++++++++++ hack/manifests/netperf-pod.yaml | 25 +++++++++++ hack/scripts/netperf.sh | 42 ++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 hack/manifests/netperf-pod.yaml create mode 100755 hack/scripts/netperf.sh diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index f1e5b241ae..fbf315aee8 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -263,6 +263,50 @@ stages: condition: always() name: "PublishResults2" displayName: "Result Artifacts" + - stage: netperf_tests + displayName: "Run netperf tests" + jobs: + - job: netperf + pool: + name: "$(BUILD_POOL_NAME_DEFAULT)" + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(TEST_SUB_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + mkdir netperf + cd netperf + kubectl apply -f test/integration/manifests/netperf-pod.yaml + kubectl rollout status deployment container6 --timeout=30s + sh hack/scripts/netperf.sh + cd .. + echo "collect cpu and mem results after netperf tests" + mkdir netperf_cpu_and_mem + cd netperf_cpu_and_mem + echo "running k top node" + kubectl top node >> "node_netperf.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_netperf.log" + echo "Logs will be available as a build artifact" + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/netperf_cpu_and_mem/ + echo $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR + cd .. + sudo cp ./netperf_cpu_and_mem/* $ARTIFACT_DIR + name: "TestLBServices" + displayName: "Scale for load tests" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: before_lb_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/netperf_cpu_and_mem" + condition: always() + name: "PublishResults" + displayName: "Netperf Result Artifacts" - stage: scale_down_cluster displayName: "Scale Down Cluster" jobs: @@ -306,5 +350,6 @@ stages: kubectl get ns cd hack/manifests kubectl delete -f apache.yaml + kubectl delete -f netperf-pod.yaml name: "DeleteTestNamespaces" displayName: "Delete Test Namespaces" diff --git a/hack/manifests/netperf-pod.yaml b/hack/manifests/netperf-pod.yaml new file mode 100644 index 0000000000..0f9cf38630 --- /dev/null +++ b/hack/manifests/netperf-pod.yaml @@ -0,0 +1,25 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: container6 + namespace: default +spec: + selector: + matchLabels: + app: container6 + replicas: 3 + template: # create pods using pod definition in this template + metadata: + # unlike pod-nginx.yaml, the name is not included in the meta data as a unique name is + # generated from the deployment name + labels: + app: container6 + spec: + containers: + - name: ubuntu + image: tamilmani1989/ubuntu18-tools + imagePullPolicy: Always + command: ["/bin/sh","-c"] + args: ["echo helloworld>hello.txt; php -S 0.0.0.0:9568"] + securityContext: + privileged: true diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh new file mode 100755 index 0000000000..c78649d1ba --- /dev/null +++ b/hack/scripts/netperf.sh @@ -0,0 +1,42 @@ +#!/bin/bash +for node in $(kubectl get nodes -o name); +do + echo "Current : $node" + node_name="${node##*/}" + echo "checking whether the node has any netperf pods deployed to it" + pod_count=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" -c) + netperf_pod=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" | awk '{print $1}') + echo "netperf pod : $netperf_pod" + echo "pod_count: $pod_count" + + if [ $pod_count -gt 1 ]; then + target_pod=$(echo $netperf_pod | cut -d" " -f 1) + target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') + same_vm_pod=$(echo $netperf_pod | cut -d" " -f 2) + kubectl exec -it $target_pod -- netserver + else + diff_vm_pod=$netperf_pod + fi +done + +#netperf on same vm pod +iteration=10 +while [ $iteration -ge 0 ] +do + echo "============ Iteration $iteration ===============" + kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "same_vm_iteration_$iteration.log" + echo "===============================" + sleep 5s + iteration=$((iteration-1)) +done + +#netperf on different vm pod +iteration=10 +while [ $iteration -ge 0 ] +do + echo "============ Iteration $iteration ===============" + kubectl exec -it $diff_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "diff_vm_iteration_$iteration.log" + echo "===============================" + sleep 5s + iteration=$((iteration-1)) +done From 1e7007b0fb954f1280ea0b53df7166fe15d981c6 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:17:38 -0700 Subject: [PATCH 50/67] give permissions to netperf script --- .pipelines/cni/cilium/cilium-scale-test.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index fbf315aee8..7cd8d39baa 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -278,6 +278,7 @@ stages: addSpnToEnvironment: true inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} + chmod +x hack/scripts/netperf.sh mkdir netperf cd netperf kubectl apply -f test/integration/manifests/netperf-pod.yaml @@ -299,7 +300,7 @@ stages: cd .. sudo cp ./netperf_cpu_and_mem/* $ARTIFACT_DIR name: "TestLBServices" - displayName: "Scale for load tests" + displayName: "Run Netperf tests" - task: PublishBuildArtifacts@1 inputs: artifactName: before_lb_cpu_and_mem From 4dbde07edf8e75affe3f9144395d04e66eb59a06 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:21:52 -0700 Subject: [PATCH 51/67] change netperf steps --- .pipelines/cni/cilium/cilium-scale-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 7cd8d39baa..a5c091c52f 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -279,10 +279,10 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} chmod +x hack/scripts/netperf.sh - mkdir netperf - cd netperf kubectl apply -f test/integration/manifests/netperf-pod.yaml kubectl rollout status deployment container6 --timeout=30s + mkdir netperf + cd netperf sh hack/scripts/netperf.sh cd .. echo "collect cpu and mem results after netperf tests" From e4dfd3d2146209394f68f08e37154d834b856951 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:26:00 -0700 Subject: [PATCH 52/67] update path to netperf yaml --- .pipelines/cni/cilium/cilium-scale-test.yaml | 4 +--- hack/scripts/netperf.sh | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index a5c091c52f..9eaf4d4f7f 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -279,12 +279,10 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} chmod +x hack/scripts/netperf.sh - kubectl apply -f test/integration/manifests/netperf-pod.yaml + kubectl apply -f hack/manifests/netperf-pod.yaml kubectl rollout status deployment container6 --timeout=30s mkdir netperf - cd netperf sh hack/scripts/netperf.sh - cd .. echo "collect cpu and mem results after netperf tests" mkdir netperf_cpu_and_mem cd netperf_cpu_and_mem diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index c78649d1ba..eff419c315 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -24,7 +24,7 @@ iteration=10 while [ $iteration -ge 0 ] do echo "============ Iteration $iteration ===============" - kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "same_vm_iteration_$iteration.log" + kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "netperf/same_vm_iteration_$iteration.log" echo "===============================" sleep 5s iteration=$((iteration-1)) @@ -35,7 +35,7 @@ iteration=10 while [ $iteration -ge 0 ] do echo "============ Iteration $iteration ===============" - kubectl exec -it $diff_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "diff_vm_iteration_$iteration.log" + kubectl exec -it $diff_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "netperf/diff_vm_iteration_$iteration.log" echo "===============================" sleep 5s iteration=$((iteration-1)) From 5570052b4540c86920b821e725a2d858f1afc879 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:37:04 -0700 Subject: [PATCH 53/67] change netperf deployment to 2 --- hack/manifests/netperf-pod.yaml | 2 +- hack/scripts/netperf.sh | 34 +++++---------------------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/hack/manifests/netperf-pod.yaml b/hack/manifests/netperf-pod.yaml index 0f9cf38630..5b5483cf13 100644 --- a/hack/manifests/netperf-pod.yaml +++ b/hack/manifests/netperf-pod.yaml @@ -7,7 +7,7 @@ spec: selector: matchLabels: app: container6 - replicas: 3 + replicas: 2 template: # create pods using pod definition in this template metadata: # unlike pod-nginx.yaml, the name is not included in the meta data as a unique name is diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index eff419c315..2f140ecb17 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -1,34 +1,10 @@ #!/bin/bash -for node in $(kubectl get nodes -o name); -do - echo "Current : $node" - node_name="${node##*/}" - echo "checking whether the node has any netperf pods deployed to it" - pod_count=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" -c) - netperf_pod=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" | awk '{print $1}') - echo "netperf pod : $netperf_pod" - echo "pod_count: $pod_count" - - if [ $pod_count -gt 1 ]; then - target_pod=$(echo $netperf_pod | cut -d" " -f 1) - target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') - same_vm_pod=$(echo $netperf_pod | cut -d" " -f 2) - kubectl exec -it $target_pod -- netserver - else - diff_vm_pod=$netperf_pod - fi -done -#netperf on same vm pod -iteration=10 -while [ $iteration -ge 0 ] -do - echo "============ Iteration $iteration ===============" - kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "netperf/same_vm_iteration_$iteration.log" - echo "===============================" - sleep 5s - iteration=$((iteration-1)) -done +netperf_pod=$(kubectl get pods -l app=container6 -o wide) +target_pod=$(echo $netperf_pod | cut -d" " -f 1) +target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') +diff_vm_pod=$netperf_pod +kubectl exec -it $target_pod -- netserver #netperf on different vm pod iteration=10 From fbda33a54e7d32f1e502d5ddac499799366750a1 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:55:56 -0700 Subject: [PATCH 54/67] get correct pods in netperf script --- hack/scripts/netperf.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index 2f140ecb17..7068643323 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -1,9 +1,8 @@ #!/bin/bash - -netperf_pod=$(kubectl get pods -l app=container6 -o wide) -target_pod=$(echo $netperf_pod | cut -d" " -f 1) +netperf_pod=$(kubectl get pods -l app=container6 -o wide | awk '{print $1}') +target_pod=$(echo $netperf_pod | cut -f 2 -d ' ') target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') -diff_vm_pod=$netperf_pod +diff_vm_pod=$(echo $netperf_pod | cut -f 3 -d ' ') kubectl exec -it $target_pod -- netserver #netperf on different vm pod From 366ddb5711960355288b24df02f5f6d93b393f1a Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 15:18:38 -0700 Subject: [PATCH 55/67] publish netperf results --- .pipelines/cni/cilium/cilium-scale-test.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 9eaf4d4f7f..ae093b3a9b 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -292,11 +292,16 @@ stages: kubectl top pod -A | grep cilium >> "pod_netperf.log" echo "Logs will be available as a build artifact" ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/netperf_cpu_and_mem/ + ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/netperf/ echo $ARTIFACT_DIR + echo $ARTIFACT_DIR2 sudo rm -rf $ARTIFACT_DIR + sudo rm -rf $ARTIFACT_DIR2 sudo mkdir $ARTIFACT_DIR + sudo mkdir $ARTIFACT_DIR2 cd .. sudo cp ./netperf_cpu_and_mem/* $ARTIFACT_DIR + sudo cp ./netperf/* $ARTIFACT_DIR2 name: "TestLBServices" displayName: "Run Netperf tests" - task: PublishBuildArtifacts@1 @@ -306,6 +311,13 @@ stages: condition: always() name: "PublishResults" displayName: "Netperf Result Artifacts" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: before_lb_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/netperf" + condition: always() + name: "PublishNetperf" + displayName: "Netperf Result Artifacts" - stage: scale_down_cluster displayName: "Scale Down Cluster" jobs: From 2de4cd8b283a97d457b10ec597e71372945d2652 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 2 Aug 2023 15:31:50 -0700 Subject: [PATCH 56/67] publish netperf results --- .pipelines/cni/cilium/cilium-scale-test.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index ae093b3a9b..7a1e5198e0 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -302,7 +302,7 @@ stages: cd .. sudo cp ./netperf_cpu_and_mem/* $ARTIFACT_DIR sudo cp ./netperf/* $ARTIFACT_DIR2 - name: "TestLBServices" + name: "NetperfIterations" displayName: "Run Netperf tests" - task: PublishBuildArtifacts@1 inputs: @@ -310,10 +310,10 @@ stages: pathtoPublish: "$(Build.ArtifactStagingDirectory)/netperf_cpu_and_mem" condition: always() name: "PublishResults" - displayName: "Netperf Result Artifacts" + displayName: "Netperf cpu and mem Artifacts" - task: PublishBuildArtifacts@1 inputs: - artifactName: before_lb_cpu_and_mem + artifactName: netperf pathtoPublish: "$(Build.ArtifactStagingDirectory)/netperf" condition: always() name: "PublishNetperf" From ebe6f54e9098f2b1cc10c6486a4b7b1079e62586 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 3 Aug 2023 10:30:01 -0700 Subject: [PATCH 57/67] add same vm test for netperf --- .pipelines/cni/cilium/cilium-scale-test.yaml | 1 + hack/manifests/netperf-pod.yaml | 5 ++- hack/scripts/netperf.sh | 36 +++++++++++++++++--- test/scale/label-nodes.sh | 7 ++++ 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 7a1e5198e0..b566e8f3ee 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -267,6 +267,7 @@ stages: displayName: "Run netperf tests" jobs: - job: netperf + timeoutInMinutes: 120 pool: name: "$(BUILD_POOL_NAME_DEFAULT)" steps: diff --git a/hack/manifests/netperf-pod.yaml b/hack/manifests/netperf-pod.yaml index 5b5483cf13..74c93d882a 100644 --- a/hack/manifests/netperf-pod.yaml +++ b/hack/manifests/netperf-pod.yaml @@ -7,14 +7,17 @@ spec: selector: matchLabels: app: container6 - replicas: 2 + replicas: 3 template: # create pods using pod definition in this template metadata: # unlike pod-nginx.yaml, the name is not included in the meta data as a unique name is # generated from the deployment name labels: app: container6 + netperf: "true" spec: + nodeSelector: + netperf: "true" containers: - name: ubuntu image: tamilmani1989/ubuntu18-tools diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index 7068643323..03bee48c14 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -1,9 +1,35 @@ #!/bin/bash -netperf_pod=$(kubectl get pods -l app=container6 -o wide | awk '{print $1}') -target_pod=$(echo $netperf_pod | cut -f 2 -d ' ') -target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') -diff_vm_pod=$(echo $netperf_pod | cut -f 3 -d ' ') -kubectl exec -it $target_pod -- netserver +# find the nodes with netperf pods and assign test vars +for node in $(kubectl get nodes -o name); +do + echo "Current : $node" + node_name="${node##*/}" + echo "checking whether the node has any netperf pods deployed to it" + pod_count=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" -c) + netperf_pod=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" | awk '{print $1}') + echo "netperf pod : $netperf_pod" + echo "pod_count: $pod_count" + + if [ $pod_count -gt 1 ]; then + target_pod=$(echo $netperf_pod | cut -d" " -f 1) + target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') + same_vm_pod=$(echo $netperf_pod | cut -d" " -f 2) + kubectl exec -it $target_pod -- netserver + else + diff_vm_pod=$netperf_pod + fi +done + +#netperf on same vm pod +iteration=10 +while [ $iteration -ge 0 ] +do + echo "============ Iteration $iteration ===============" + kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "netperf/same_vm_iteration_$iteration.log" + echo "===============================" + sleep 5s + iteration=$((iteration-1)) +done #netperf on different vm pod iteration=10 diff --git a/test/scale/label-nodes.sh b/test/scale/label-nodes.sh index ec500f677b..3512dd07d7 100755 --- a/test/scale/label-nodes.sh +++ b/test/scale/label-nodes.sh @@ -1,6 +1,7 @@ #!/bin/sh cmd=$1 retries=0 +node_count=0 while [ $retries -lt 5 ]; do $cmd if [ $? -eq 0 ]; then @@ -17,11 +18,17 @@ fi for node in $(kubectl get nodes -o name); do + node_count=$((node_count + 1)) + echo $node_count echo "Current : $node" node_name="${node##*/}" echo "Apply label to the node" kubectl label node $node_name connectivity-test=true kubectl label node $node_name scale-test=true + if [ $node_count -lt 3 ]; then + kubectl label node $node_name netperf=true + echo "labeled node for netperf testing" + fi if [ $? -eq 0 ]; then echo "Label applied to the node" else From 4da84e2fd8f6b29540ae230c09794d63a2989550 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 3 Aug 2023 13:46:02 -0700 Subject: [PATCH 58/67] netperf script to print pod values --- hack/scripts/netperf.sh | 43 +++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index 03bee48c14..8bd2104d2c 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -1,25 +1,38 @@ #!/bin/bash # find the nodes with netperf pods and assign test vars +node_found=0 for node in $(kubectl get nodes -o name); do - echo "Current : $node" - node_name="${node##*/}" - echo "checking whether the node has any netperf pods deployed to it" - pod_count=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" -c) - netperf_pod=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" | awk '{print $1}') - echo "netperf pod : $netperf_pod" - echo "pod_count: $pod_count" + while [ $node_found -lt 3 ] + do + echo "Current : $node" + node_name="${node##*/}" + echo "checking whether the node has any netperf pods deployed to it" + pod_count=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" -c) + netperf_pod=$(kubectl get pods -l app=container6 -o wide | grep "$node_name" | awk '{print $1}') + echo "netperf pod : $netperf_pod" + echo "pod_count: $pod_count" - if [ $pod_count -gt 1 ]; then - target_pod=$(echo $netperf_pod | cut -d" " -f 1) - target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') - same_vm_pod=$(echo $netperf_pod | cut -d" " -f 2) - kubectl exec -it $target_pod -- netserver - else - diff_vm_pod=$netperf_pod - fi + if [ $pod_count -gt 1 ]; then + target_pod=$(echo $netperf_pod | cut -d" " -f 1) + target_pod_ip=$(kubectl get pod "$target_pod" -o jsonpath='{.status.podIP}') + same_vm_pod=$(echo $netperf_pod | cut -d" " -f 2) + kubectl exec -it $target_pod -- netserver + node_found=$((node_found + 1)) + echo "Number of nodes found with netperf pod: $node_found" + else + diff_vm_pod=$netperf_pod + node_found=$((node_found + 1)) + echo "Number of nodes found with netperf pod: $node_found" + fi + done done +echo "target netperf pod: $target_pod" +echo "target netperf pod IP: $target_pod_ip" +echo "same vm pod: $same_vm_pod" +echo "different vm pod: $diff_vm_pod" + #netperf on same vm pod iteration=10 while [ $iteration -ge 0 ] From 52ddeb9a604b2a6d2a94c5a65bffdc8852231cd6 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 3 Aug 2023 16:51:23 -0700 Subject: [PATCH 59/67] netperf find nodes logic --- hack/scripts/netperf.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index 8bd2104d2c..d560d9f3a1 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -3,8 +3,7 @@ node_found=0 for node in $(kubectl get nodes -o name); do - while [ $node_found -lt 3 ] - do + if [ $node_found -lt 3 ]; then echo "Current : $node" node_name="${node##*/}" echo "checking whether the node has any netperf pods deployed to it" @@ -25,7 +24,7 @@ do node_found=$((node_found + 1)) echo "Number of nodes found with netperf pod: $node_found" fi - done + fi done echo "target netperf pod: $target_pod" From e611c08e68b1cf87b9bfc6c3518fc166dd4ad092 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:15:30 -0700 Subject: [PATCH 60/67] netperf find nodes logic --- hack/scripts/netperf.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index d560d9f3a1..6d1be52b4d 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -3,7 +3,7 @@ node_found=0 for node in $(kubectl get nodes -o name); do - if [ $node_found -lt 3 ]; then + if [ $node_found -lt 2 ]; then echo "Current : $node" node_name="${node##*/}" echo "checking whether the node has any netperf pods deployed to it" From a35ce9a6ac16cfab2be200fcc4b5b822a7071548 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 9 Aug 2023 16:07:43 -0700 Subject: [PATCH 61/67] address comments, add more cpu and mem collection, and rename artifact folders --- .pipelines/cni/cilium/cilium-scale-test.yaml | 125 +++++++++++++------ test/scale/label-nodes.sh | 1 - 2 files changed, 84 insertions(+), 42 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index b566e8f3ee..244e844f22 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -30,8 +30,9 @@ stages: CNS_IMAGE=${CNS_IMAGE} sed -i '/containers:/{n;n;s/\(image\).*/\1: '"${CNS_IMAGE//\//\\/}"'/}' test/integration/manifests/cns/daemonset.yaml kubectl apply -f test/integration/manifests/cns/daemonset.yaml - vmss_name=$(az vmss list -g MC_${CLUSTER}_${CLUSTER}_$(LOCATION) --query "[].name" -o tsv) - make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${CLUSTER} REGION=$(LOCATION) VMSS_NAME=$vmss_name + for val in $(az vmss list -g MC_${clusterName}_${clusterName}_$(REGION_AKS_CLUSTER_TEST) --query "[].name" -o tsv); do + make -C ./hack/aks restart-vmss AZCLI=az CLUSTER=${clusterName} REGION=$(REGION_AKS_CLUSTER_TEST) VMSS_NAME=${val} + done kubectl get node kubectl get pod -A name: "UpdateCiliumandCNSVersion" @@ -93,32 +94,53 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} echo "collect cpu and memory usage before scaling for network policies" - mkdir before_netpol_cpu_and_mem - cd before_netpol_cpu_and_mem + mkdir test1_1_netpol_cpu_and_mem_before + cd test1_1_netpol_cpu_and_mem_before echo "running k top node" kubectl top node >> "node_before_netpol_scale.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_before_netpol_scale.log" echo "Logs will be available as a build artifact" - ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/before_netpol_cpu_and_mem/ + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test1_1_netpol_cpu_and_mem_before/ echo $ARTIFACT_DIR sudo rm -rf $ARTIFACT_DIR sudo mkdir $ARTIFACT_DIR cd .. - sudo cp ./before_netpol_cpu_and_mem/* $ARTIFACT_DIR + sudo cp ./test1_1_netpol_cpu_and_mem_before/* $ARTIFACT_DIR echo "scale deployment and to prep for network policies test" cd test/scale chmod +x test-scale.sh ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_NETPOL} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_NETPOL} --num-real-replicas=${NUM_REAL_REPLICAS_NETPOL} --num-network-policies=${APPLIED_NETPOL} --num-unapplied-network-policies=${UNAPPLIED_NETPOL} --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_NETPOL} --delete-labels + echo "collect cpu and mem results after scaling" + mkdir test1_2_netpol_cpu_and_mem_scale + cd test1_2_netpol_cpu_and_mem_scale + echo "running k top node" + kubectl top node >> "node_netpol_scale.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_netpol_scale.log" + echo "Logs will be available as a build artifact" + ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/test1_2_netpol_cpu_and_mem_scale/ + echo $ARTIFACT_DIR2 + sudo rm -rf $ARTIFACT_DIR2 + sudo mkdir $ARTIFACT_DIR2 + cd .. + sudo cp ./test1_2_netpol_cpu_and_mem_scale/* $ARTIFACT_DIR2 name: "scaling" displayName: "Run scale script" - task: PublishBuildArtifacts@1 inputs: - artifactName: before_netpol_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/before_netpol_cpu_and_mem" + artifactName: test1_1_netpol_cpu_and_mem_before + pathtoPublish: "$(Build.ArtifactStagingDirectory)/test1_1_netpol_cpu_and_mem_before" condition: always() name: "PublishResults" displayName: "Result Artifacts" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: test1_2_netpol_cpu_and_mem_scale + pathtoPublish: "$(Build.ArtifactStagingDirectory)/test1_2_netpol_cpu_and_mem_scale" + condition: always() + name: "PublishResults2" + displayName: "Result Network Policies Artifacts" - stage: test_network_policies_connectivity displayName: "Test Network Policies" jobs: @@ -140,25 +162,25 @@ stages: chmod +x test-connectivity.sh ./test-connectivity.sh --num-scale-pods-to-verify=${NUM_SCALE_PODS_TO_VERIFY} --max-wait-for-initial-connectivity=600 --max-wait-after-adding-netpol=120 echo "collect cpu and mem results after connectivity tests" - mkdir after_netpol_cpu_and_mem - cd after_netpol_cpu_and_mem + mkdir test1_3_netpol_cpu_and_mem_after + cd test1_3_netpol_cpu_and_mem_after echo "running k top node" kubectl top node >> "node_after_netpol_tests.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_after_netpol_tests.log" echo "Logs will be available as a build artifact" - ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/after_netpol_cpu_and_mem/ + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test1_3_netpol_cpu_and_mem_after/ echo $ARTIFACT_DIR sudo rm -rf $ARTIFACT_DIR sudo mkdir $ARTIFACT_DIR cd .. - sudo cp ./after_netpol_cpu_and_mem/* $ARTIFACT_DIR + sudo cp ./test1_3_netpol_cpu_and_mem_after/* $ARTIFACT_DIR name: "TestNetworkPolicies" displayName: "Network Policies Connectivity Test" - task: PublishBuildArtifacts@1 inputs: - artifactName: after_netpol_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_netpol_cpu_and_mem" + artifactName: test1_3_netpol_cpu_and_mem_after + pathtoPublish: "$(Build.ArtifactStagingDirectory)/test1_3_netpol_cpu_and_mem_after" condition: always() name: "PublishResults" displayName: "Result Artifacts" @@ -178,31 +200,52 @@ stages: inlineScript: | az aks get-credentials --resource-group ${CLUSTER} --name ${CLUSTER} echo "collect cpu and mem results before scale for lb tests" - mkdir before_lb_cpu_and_mem - cd before_lb_cpu_and_mem + mkdir test2_1_lb_cpu_and_mem_before + cd test2_1_lb_cpu_and_mem_before echo "running k top node" kubectl top node >> "node_before_lb_scale.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_before_lb_scale.log" echo "Logs will be available as a build artifact" - ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/before_lb_cpu_and_mem/ + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test2_1_lb_cpu_and_mem_before/ echo $ARTIFACT_DIR sudo rm -rf $ARTIFACT_DIR sudo mkdir $ARTIFACT_DIR cd .. - sudo cp ./before_lb_cpu_and_mem/* $ARTIFACT_DIR + sudo cp ./test2_1_lb_cpu_and_mem_before/* $ARTIFACT_DIR cd test/scale chmod +x test-scale.sh ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_LB} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_LB} --num-real-replicas=${NUM_REAL_REPLICAS_LB} --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_LB} --delete-labels --real-pod-type=nginx + echo "collect cpu and mem results after scaling" + mkdir test2_2_lb_cpu_and_mem_scale + cd scale_lb_cpu_and_mem + echo "running k top node" + kubectl top node >> "node_lb_scale.log" + echo "running k top pod" + kubectl top pod -A | grep cilium >> "pod_lb_scale.log" + echo "Logs will be available as a build artifact" + ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/test2_2_lb_cpu_and_mem_scale/ + echo $ARTIFACT_DIR2 + sudo rm -rf $ARTIFACT_DIR2 + sudo mkdir $ARTIFACT_DIR2 + cd .. + sudo cp ./test2_2_lb_cpu_and_mem_scale/* $ARTIFACT_DIR2 name: "TestLBServices" displayName: "Scale for load tests" - task: PublishBuildArtifacts@1 inputs: - artifactName: before_lb_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/before_lb_cpu_and_mem" + artifactName: test2_1_lb_cpu_and_mem_before + pathtoPublish: "$(Build.ArtifactStagingDirectory)/test2_1_lb_cpu_and_mem_before" condition: always() name: "PublishResults" displayName: "Result Artifacts" + - task: PublishBuildArtifacts@1 + inputs: + artifactName: test2_2_lb_cpu_and_mem_scale + pathtoPublish: "$(Build.ArtifactStagingDirectory)/test2_2_lb_cpu_and_mem_scale" + condition: always() + name: "PublishResults2" + displayName: "Result Scale Artifacts" - stage: benchmark_testing displayName: "Run apachebench test" jobs: @@ -224,42 +267,42 @@ stages: echo "wait for pod to become ready" kubectl rollout status deployment apachebench --timeout=30s kubectl get pod -owide - mkdir apachebench - cd apachebench + mkdir test2_apachebench + cd test2_apachebench AB_POD=$(kubectl get pod -l app=apachebench | grep apachebench | awk '{print $1}') kubectl exec -it $AB_POD -- ab -n ${AB_REQUESTS} -c ${AB_CONCURRENCY} -r http://real-svc-00001.scale-test/ >> "ab_${AB_REQUESTS}requests_${NUM_REAL_REPLICAS_LB}kpods.log" echo "collect cpu and memory usage after apachebench tests" cd .. - mkdir after_lb_cpu_and_mem - cd after_lb_cpu_and_mem + mkdir test2_3_lb_cpu_and_mem_after + cd test2_3_lb_cpu_and_mem_after echo "running k top node" kubectl top node >> "node_after_lb_tests.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_after_lb_tests.log" cd .. echo "Logs will be available as a build artifact" - ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/after_lb_cpu_and_mem/ + ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/test2_3_lb_cpu_and_mem_after/ ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/apachebench/ echo $ARTIFACT_DIR echo $ARTIFACT_DIR2 sudo rm -rf $ARTIFACT_DIR $ARTIFACT_DIR2 sudo mkdir $ARTIFACT_DIR sudo mkdir $ARTIFACT_DIR2 - sudo cp ./apachebench/* $ARTIFACT_DIR - sudo cp ./after_lb_cpu_and_mem/* $ARTIFACT_DIR2 + sudo cp ./test2_apachebench/* $ARTIFACT_DIR + sudo cp ./test2_3_lb_cpu_and_mem_after/* $ARTIFACT_DIR2 name: "TestLBServices" displayName: "Apachebench testing" - task: PublishBuildArtifacts@1 inputs: - artifactName: apachebench - pathtoPublish: "$(Build.ArtifactStagingDirectory)/apachebench" + artifactName: test2_apachebench + pathtoPublish: "$(Build.ArtifactStagingDirectory)/test2_apachebench" condition: always() name: "PublishResults" displayName: "Apachebench Result Artifacts" - task: PublishBuildArtifacts@1 inputs: - artifactName: after_lb_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/after_lb_cpu_and_mem" + artifactName: test2_3_lb_cpu_and_mem_after + pathtoPublish: "$(Build.ArtifactStagingDirectory)/test2_3_lb_cpu_and_mem_after" condition: always() name: "PublishResults2" displayName: "Result Artifacts" @@ -285,15 +328,15 @@ stages: mkdir netperf sh hack/scripts/netperf.sh echo "collect cpu and mem results after netperf tests" - mkdir netperf_cpu_and_mem - cd netperf_cpu_and_mem + mkdir test3_netperf_cpu_and_mem + cd test3_netperf_cpu_and_mem echo "running k top node" kubectl top node >> "node_netperf.log" echo "running k top pod" kubectl top pod -A | grep cilium >> "pod_netperf.log" echo "Logs will be available as a build artifact" - ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/netperf_cpu_and_mem/ - ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/netperf/ + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test3_netperf_cpu_and_mem/ + ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/test3_netperf/ echo $ARTIFACT_DIR echo $ARTIFACT_DIR2 sudo rm -rf $ARTIFACT_DIR @@ -301,21 +344,21 @@ stages: sudo mkdir $ARTIFACT_DIR sudo mkdir $ARTIFACT_DIR2 cd .. - sudo cp ./netperf_cpu_and_mem/* $ARTIFACT_DIR - sudo cp ./netperf/* $ARTIFACT_DIR2 + sudo cp ./test3_netperf_cpu_and_mem/* $ARTIFACT_DIR + sudo cp ./test3_netperf/* $ARTIFACT_DIR2 name: "NetperfIterations" displayName: "Run Netperf tests" - task: PublishBuildArtifacts@1 inputs: - artifactName: before_lb_cpu_and_mem - pathtoPublish: "$(Build.ArtifactStagingDirectory)/netperf_cpu_and_mem" + artifactName: test3_netperf_cpu_and_mem + pathtoPublish: "$(Build.ArtifactStagingDirectory)/test3_netperf_cpu_and_mem" condition: always() name: "PublishResults" displayName: "Netperf cpu and mem Artifacts" - task: PublishBuildArtifacts@1 inputs: - artifactName: netperf - pathtoPublish: "$(Build.ArtifactStagingDirectory)/netperf" + artifactName: test3_netperf + pathtoPublish: "$(Build.ArtifactStagingDirectory)/test3_netperf" condition: always() name: "PublishNetperf" displayName: "Netperf Result Artifacts" diff --git a/test/scale/label-nodes.sh b/test/scale/label-nodes.sh index 3512dd07d7..4c5428575e 100755 --- a/test/scale/label-nodes.sh +++ b/test/scale/label-nodes.sh @@ -34,5 +34,4 @@ do else echo "Error in applying label to the node $node_name" fi - sleep 2s done From cbfeab7b0217e50ebd6e7934b6d46728b00bef85 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Wed, 9 Aug 2023 16:12:26 -0700 Subject: [PATCH 62/67] address comments for labeling nodes --- test/scale/label-nodes.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/test/scale/label-nodes.sh b/test/scale/label-nodes.sh index 4c5428575e..abff086f81 100755 --- a/test/scale/label-nodes.sh +++ b/test/scale/label-nodes.sh @@ -34,4 +34,5 @@ do else echo "Error in applying label to the node $node_name" fi + sleep 1s done From ddfe7e989a06ae7695b9013b524670d92d1964d9 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 10 Aug 2023 09:01:00 -0700 Subject: [PATCH 63/67] fix artifact directory name --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 244e844f22..c339d5eccd 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -218,7 +218,7 @@ stages: ./test-scale.sh --max-kwok-pods-per-node=0 --num-kwok-deployments=0 --num-kwok-replicas=0 --max-real-pods-per-node=${REAL_PODS_PER_NODE_LB} --num-real-deployments=${NUM_REAL_DEPLOYMENTS_LB} --num-real-replicas=${NUM_REAL_REPLICAS_LB} --num-network-policies=0 --num-unapplied-network-policies=0 --num-unique-labels-per-pod=0 --num-unique-labels-per-deployment=5 --num-shared-labels-per-pod=3 --num-real-services=${NUM_REAL_SVC_LB} --delete-labels --real-pod-type=nginx echo "collect cpu and mem results after scaling" mkdir test2_2_lb_cpu_and_mem_scale - cd scale_lb_cpu_and_mem + cd test2_2_lb_cpu_and_mem_scale echo "running k top node" kubectl top node >> "node_lb_scale.log" echo "running k top pod" From bea097566072f5527d46ca1112f3ae982a5abe19 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 10 Aug 2023 09:32:51 -0700 Subject: [PATCH 64/67] fix artifact directory name apache --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index c339d5eccd..bf27c35395 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -282,7 +282,7 @@ stages: cd .. echo "Logs will be available as a build artifact" ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/test2_3_lb_cpu_and_mem_after/ - ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/apachebench/ + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/ test2_apachebench/ echo $ARTIFACT_DIR echo $ARTIFACT_DIR2 sudo rm -rf $ARTIFACT_DIR $ARTIFACT_DIR2 From 4a8138753f67a2ad816ce240e66d27c70362e7cf Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 10 Aug 2023 09:37:40 -0700 Subject: [PATCH 65/67] fix artifact directory name apache --- .pipelines/cni/cilium/cilium-scale-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index bf27c35395..9f2df27f20 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -282,7 +282,7 @@ stages: cd .. echo "Logs will be available as a build artifact" ARTIFACT_DIR2=$(Build.ArtifactStagingDirectory)/test2_3_lb_cpu_and_mem_after/ - ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/ test2_apachebench/ + ARTIFACT_DIR=$(Build.ArtifactStagingDirectory)/test2_apachebench/ echo $ARTIFACT_DIR echo $ARTIFACT_DIR2 sudo rm -rf $ARTIFACT_DIR $ARTIFACT_DIR2 From 45cbda92d6d3c9a65053e497d6abc82e1eb109e0 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 10 Aug 2023 09:59:30 -0700 Subject: [PATCH 66/67] fix artifact directory name netperf --- .pipelines/cni/cilium/cilium-scale-test.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.pipelines/cni/cilium/cilium-scale-test.yaml b/.pipelines/cni/cilium/cilium-scale-test.yaml index 9f2df27f20..8ff45b570d 100644 --- a/.pipelines/cni/cilium/cilium-scale-test.yaml +++ b/.pipelines/cni/cilium/cilium-scale-test.yaml @@ -310,7 +310,6 @@ stages: displayName: "Run netperf tests" jobs: - job: netperf - timeoutInMinutes: 120 pool: name: "$(BUILD_POOL_NAME_DEFAULT)" steps: @@ -325,7 +324,7 @@ stages: chmod +x hack/scripts/netperf.sh kubectl apply -f hack/manifests/netperf-pod.yaml kubectl rollout status deployment container6 --timeout=30s - mkdir netperf + mkdir test3_netperf sh hack/scripts/netperf.sh echo "collect cpu and mem results after netperf tests" mkdir test3_netperf_cpu_and_mem From b2a08b1686d1d60351cebc752d9ba33b23a4aa47 Mon Sep 17 00:00:00 2001 From: camrynl <31013536+camrynl@users.noreply.github.com> Date: Thu, 10 Aug 2023 10:07:48 -0700 Subject: [PATCH 67/67] fix artifact directory name netperf in script --- hack/scripts/netperf.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hack/scripts/netperf.sh b/hack/scripts/netperf.sh index 6d1be52b4d..95092134eb 100755 --- a/hack/scripts/netperf.sh +++ b/hack/scripts/netperf.sh @@ -37,7 +37,7 @@ iteration=10 while [ $iteration -ge 0 ] do echo "============ Iteration $iteration ===============" - kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "netperf/same_vm_iteration_$iteration.log" + kubectl exec -it $same_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "test3_netperf/same_vm_iteration_$iteration.log" echo "===============================" sleep 5s iteration=$((iteration-1)) @@ -48,7 +48,7 @@ iteration=10 while [ $iteration -ge 0 ] do echo "============ Iteration $iteration ===============" - kubectl exec -it $diff_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "netperf/diff_vm_iteration_$iteration.log" + kubectl exec -it $diff_vm_pod -- netperf -H $target_pod_ip -l 30 -t TCP_STREAM >> "test3_netperf/diff_vm_iteration_$iteration.log" echo "===============================" sleep 5s iteration=$((iteration-1))