From 813525c176ace79a7348eeda4c7bfee5a80001a5 Mon Sep 17 00:00:00 2001 From: vipul-21 Date: Tue, 6 Jun 2023 10:47:00 -0700 Subject: [PATCH] CI:[CNI] Replace the bash scripts with the golang test cases --- .../cni/cilium/cilium-cni-load-test.yaml | 37 +-- .../cilium-overlay-e2e-step-template.yaml | 5 +- .../cilium/cilium-e2e-step-template.yaml | 5 +- Makefile | 6 + hack/scripts/scale_deployment.sh | 74 ------ hack/scripts/validate_state.sh | 161 ------------ test/integration/k8s_test.go | 24 +- test/integration/load/load_test.go | 161 ++++++++++++ .../manifests/load/noop-deployment.yaml | 11 +- .../manifests/load/privileged-daemonset.yaml | 0 test/integration/setup_test.go | 33 +-- .../k8sutils}/label.go | 2 +- .../k8sutils/utils.go} | 130 ++++++++-- .../k8sutils/utils_create.go} | 50 +++- .../k8sutils/utils_delete.go} | 15 +- test/internal/k8sutils/utils_get.go | 42 ++++ .../k8sutils/utils_parse.go} | 8 +- test/{integration => internal}/retry/retry.go | 2 - test/validate/client.go | 38 +++ test/validate/linux_validate.go | 232 ++++++++++++++++++ test/validate/utils.go | 39 +++ 21 files changed, 741 insertions(+), 334 deletions(-) delete mode 100644 hack/scripts/scale_deployment.sh delete mode 100755 hack/scripts/validate_state.sh create mode 100644 test/integration/load/load_test.go rename hack/manifests/pod.yaml => test/integration/manifests/load/noop-deployment.yaml (64%) rename hack/manifests/hostprocess.yaml => test/integration/manifests/load/privileged-daemonset.yaml (100%) rename test/{integration => internal/k8sutils}/label.go (97%) rename test/{integration/utils_test.go => internal/k8sutils/utils.go} (58%) rename test/{integration/utils_create_test.go => internal/k8sutils/utils_create.go} (73%) rename test/{integration/utils_delete_test.go => internal/k8sutils/utils_delete.go} (64%) create mode 100644 test/internal/k8sutils/utils_get.go rename test/{integration/utils_parse_test.go => internal/k8sutils/utils_parse.go} (87%) rename test/{integration => internal}/retry/retry.go (96%) create mode 100644 test/validate/client.go create mode 100644 test/validate/linux_validate.go create mode 100644 test/validate/utils.go diff --git a/.pipelines/cni/cilium/cilium-cni-load-test.yaml b/.pipelines/cni/cilium/cilium-cni-load-test.yaml index 330556aa2be..904aca83537 100644 --- a/.pipelines/cni/cilium/cilium-cni-load-test.yaml +++ b/.pipelines/cni/cilium/cilium-cni-load-test.yaml @@ -16,7 +16,7 @@ stages: inlineScript: | set -ex make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION) - make -C ./hack/swift overlay-no-kube-proxy-up AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${RESOURCE_GROUP} NODE_COUNT=10 VM_SIZE=Standard_DS4_v2 + make -C ./hack/swift overlay-no-kube-proxy-up AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${RESOURCE_GROUP}-$(make revision) NODE_COUNT=10 VM_SIZE=Standard_DS4_v2 name: "CreateAksCluster" displayName: "Create AKS Cluster" - stage: install_cilium @@ -35,7 +35,7 @@ stages: inlineScript: | set -ex az extension add --name aks-preview - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} + make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}-$(make revision) ls -lah pwd kubectl cluster-info @@ -72,10 +72,9 @@ stages: inlineScript: | set -ex az extension add --name aks-preview - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} - kubectl apply -f hack/manifests/pod.yaml - kubectl apply -f hack/manifests/hostprocess.yaml - bash hack/scripts/scale_deployment.sh + make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}-$(make revision) + cd test/integration/load + go test -timeout 30m -tags load -run ^TestLoad$ -tags=load -iterations=4 -scaleup=2400 - stage: validate_state dependsOn: pod_deployment displayName: "Validate State" @@ -89,9 +88,9 @@ stages: scriptType: "bash" addSpnToEnvironment: true inlineScript: | - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} + make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}-$(make revision) kubectl get pods -A - bash hack/scripts/validate_state.sh + make test-validate-state name: "ValidateState" displayName: "Validate State" retryCountOnTaskFailure: 3 @@ -109,18 +108,22 @@ stages: addSpnToEnvironment: true inlineScript: | echo "Scale up the pods and immediated restart the nodes" - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} + make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}-$(make revision) make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION) + cd test/integration/load echo "Scaling the pods down to 100 per node" - bash ./hack/scripts/scale_deployment.sh -n 0 -u 1000 -s + go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -scaleup=1000 -skip-wait=true + cd ../../../ echo "Restarting the nodes" vmss_name=$(az vmss list -g MC_${RESOURCE_GROUP}_${RESOURCE_GROUP}_$(LOCATION) --query "[].name" -o tsv) - make -C ./hack/swift restart-vmss AZCLI=az CLUSTER=${RESOURCE_GROUP} REGION=$(LOCATION) VMSS_NAME=$vmss_name - bash ./hack/scripts/scale_deployment.sh -n 0 -u 1000 -c + make -C ./hack/swift restart-vmss AZCLI=az CLUSTER=${RESOURCE_GROUP}-$(make revision) REGION=$(LOCATION) VMSS_NAME=$vmss_name + cd test/integration/load + go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -replicas=1000 name: "RestartNodes" displayName: "Restart Nodes" - script: | - bash hack/scripts/validate_state.sh -r true + export RESTART_CASE=true + make test-validate-state name: "ValidateState" displayName: "Validate State" retryCountOnTaskFailure: 3 @@ -148,11 +151,11 @@ stages: addSpnToEnvironment: true inlineScript: | set -ex - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} + make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}-$(make revision) name: "GetCluster" displayName: "Get AKS Cluster" - script: | - kubectl delete deployment container -n default + kubectl delete ns load-test cilium connectivity test retryCountOnTaskFailure: 6 name: "CiliumConnectivityTests" @@ -175,9 +178,9 @@ stages: if [ "$(DELETE_RESOURCES)" ] then echo "Deleting Cluster and resource group" - make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP} + make -C ./hack/swift set-kubeconf AZCLI=az CLUSTER=${RESOURCE_GROUP}-$(make revision) make -C ./hack/swift azcfg AZCLI=az REGION=$(LOCATION) - make -C ./hack/swift down AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${RESOURCE_GROUP} + make -C ./hack/swift down AZCLI=az REGION=$(LOCATION) SUB=$(SUBSCRIPTION_ID) CLUSTER=${RESOURCE_GROUP}-$(make revision) echo "Cluster and resources down" else echo "Deletion of resources is False" diff --git a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml index 3f288524727..016dc73a419 100644 --- a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml +++ b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-step-template.yaml @@ -131,13 +131,12 @@ steps: - script: | echo "validate pod IP assignment and check systemd-networkd restart" - kubectl apply -f hack/manifests/hostprocess.yaml kubectl get pod -owide -A - bash hack/scripts/validate_state.sh + make test-validate-state echo "delete cilium connectivity test resources and re-validate state" kubectl delete ns cilium-test kubectl get pod -owide -A - bash hack/scripts/validate_state.sh + make test-validate-state name: "validatePods" displayName: "Validate Pods" diff --git a/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml b/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml index bf67c7d2163..5c390203105 100644 --- a/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml +++ b/.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml @@ -126,13 +126,12 @@ steps: - script: | echo "validate pod IP assignment and check systemd-networkd restart" - kubectl apply -f hack/manifests/hostprocess.yaml kubectl get pod -owide -A - bash hack/scripts/validate_state.sh + make test-validate-state echo "delete cilium connectivity test resources and re-validate state" kubectl delete ns cilium-test kubectl get pod -owide -A - bash hack/scripts/validate_state.sh + make test-validate-state name: "validatePods" displayName: "Validate Pods" diff --git a/Makefile b/Makefile index e8d7b9dfa07..3933d0c2065 100644 --- a/Makefile +++ b/Makefile @@ -709,6 +709,8 @@ workspace: ## Set up the Go workspace. ##@ Test COVER_PKG ?= . +#Restart case is used for cni load test pipeline for restarting the nodes cluster. +RESTART_CASE ?= false # COVER_FILTER omits folders with all files tagged with one of 'unit', '!ignore_uncovered', or '!ignore_autogenerated' test-all: ## run all unit tests. @@ -721,6 +723,10 @@ test-integration: ## run all integration tests. CNS_VERSION=$(CNS_VERSION) \ go test -mod=readonly -buildvcs=false -timeout 1h -coverpkg=./... -race -covermode atomic -coverprofile=coverage.out -tags=integration ./test/integration... +test-validate-state: + cd test/integration/load && go test -count 1 -timeout 30m -tags load -run ^TestValidateState -tags=load -restart-case=$(RESTART_CASE) + cd ../../.. + test-cyclonus: ## run the cyclonus test for npm. cd test/cyclonus && bash ./test-cyclonus.sh cd .. diff --git a/hack/scripts/scale_deployment.sh b/hack/scripts/scale_deployment.sh deleted file mode 100644 index ceb715057de..00000000000 --- a/hack/scripts/scale_deployment.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash -set -ex -total_num_of_run=5 -scale_up_of_pods=2400 -scale_down_pods=1 - -function help() -{ - echo "Scale deployment based on the parameters." - echo "By default script will repeat the process of scale up/down" - echo - echo "Syntax: scale [-h|n|u|s|c|r]" - echo "options:" - echo "h Print this help." - echo "n Number of times the scale down/scale up task should run." - echo "u Number of pods to be scaled up." - echo "s Scale the pods single time. Accepted Values: true, default : false" - echo "c Check deployment status. Accepted Values: true, default : false" - echo -} - -function check_deployment() { - available=-1 - replicas="$1" - while [ "${available}" -ne "${replicas}" ]; do - sleep 5s - current_available=$(kubectl get deployment container -o "jsonpath={.status.availableReplicas}" ) - if [ "$current_available" != '' ]; then - available=$current_available - fi - echo "available replicas: ${available}" - done - echo "deployment complete." -} - -function scale_deployment() -{ - desired_replicas=$1 - kubectl scale deployment container --replicas "$desired_replicas" - echo "Scaled the deployment to $desired_replicas" -} - -function repeat_deployment() { - echo "Total num of run $total_num_of_run" - for ((i=1; i <= total_num_of_run; i++)) - do - echo "Current Run: $i" - echo "Scaling down pods to : $scale_down_pods" - scale_deployment $scale_down_pods - check_deployment $scale_down_pods - echo "Scaling pods to : $scale_up_of_pods" - scale_deployment "$scale_up_of_pods" - check_deployment "$scale_up_of_pods" - done -} - -while getopts ":h:n:u:sc" option; do - case $option in - h) help - exit;; - n) total_num_of_run=$OPTARG;; - u) scale_up_of_pods=$OPTARG;; - s) echo "Scale deployment" - scale_deployment "$scale_up_of_pods";; - c) echo "Check deployment" - check_deployment "$scale_up_of_pods";; - \?) echo "Error: Invalid option" - exit;; - esac -done - -if [ "$total_num_of_run" -gt 0 ]; then - repeat_deployment -fi diff --git a/hack/scripts/validate_state.sh b/hack/scripts/validate_state.sh deleted file mode 100755 index 5c8cad03aca..00000000000 --- a/hack/scripts/validate_state.sh +++ /dev/null @@ -1,161 +0,0 @@ -#!/bin/bash -restart=false -skipNode=false - -function find_in_array() { - for i in $1 - do - if [ "$i" == "$2" ] ; then - return 0 - fi - done - return 1 -} - -for node in $(kubectl get nodes -o name); -do - while getopts ":r" opt; do - case $opt in - r ) restart=true - echo "getting restart flag";; - esac - done - - echo "Current : $node" - node_name="${node##*/}" - node_ip=$(kubectl get "$node" -o jsonpath='{$.status.addresses[?(@.type=="InternalIP")].address}') - echo "Node internal ip: $node_ip" - - privileged_pod=$(kubectl get pods -n kube-system -l app=privileged-daemonset -o wide | grep "$node_name" | awk '{print $1}') - echo "privileged pod : $privileged_pod" - if [ "$privileged_pod" == '' ]; then - kubectl describe daemonset privileged-daemonset -n kube-system - exit 1 - fi - for i in {1..5}; - do - if ! [ -s "azure_endpoints.json" ]; then - echo "trying to get the azure_endpoints" - kubectl exec -i "$privileged_pod" -n kube-system -- bash -c "cat /var/run/azure-cns/azure-endpoints.json" > azure_endpoints.json - sleep 10 - if [ "$i" == "5" ]; then - if [ $restart == true ]; then - echo "node was restarted, no statefile exists" - skipNode=true - else - printf "Error: Failed to get azure_endpoints.json" - exit 1 - fi - fi - fi - done - - if [ $skipNode == true ]; then - echo $skipNode - echo "node was restarted, skip validate state for this node" - # reset skipNode to false - skipNode=false - continue - fi - - cilium_agent=$(kubectl get pod -l k8s-app=cilium -n kube-system -o wide | grep "$node_name" | awk '{print $1}') - echo "cilium agent : $cilium_agent" - - while ! [ -s "cilium_endpoints.json" ] - do - echo "trying to get the cilium_endpoints" - kubectl exec -i "$cilium_agent" -n kube-system -- bash -c "cilium endpoint list -o json" > cilium_endpoints.json - sleep 10 - done - - cns_pod=$(kubectl get pod -l k8s-app=azure-cns -n kube-system -o wide | grep "$node_name" | awk '{print $1}') - echo "azure-cns pod : $cns_pod" - - while ! [ -s "cns_endpoints.json" ] - do - echo "trying to get the cns_endpoints" - kubectl exec -it "$cns_pod" -n kube-system -- curl localhost:10090/debug/ipaddresses -d '{"IPConfigStateFilter":["Assigned"]}' > cns_endpoints.json - sleep 10 - done - - total_pods=$(kubectl get pods --all-namespaces -o wide --field-selector spec.nodeName="$node_name",status.phase=Running --output json) - - echo "Checking if there are any pods with no ips" - pods_with_no_ip=$(echo "$total_pods" | jq -j '(.items[] | select(.status.podIP == "" or .status.podIP == null))') - if [ "$pods_with_no_ip" != "" ]; then - echo "There are some pods with no ip assigned." - kubectl get pods -A -o wide - exit 1 - fi - - total_pods_ips=$(echo "$total_pods" | jq -r '(.items[] | select(.status.podIP != "" and .status.podIP != null)) | .status.podIP') - pod_ips=() - num_of_pod_ips=0 - for ip in $total_pods_ips - do - if [ "$ip" != "$node_ip" ]; then - pod_ips+=("$ip") - num_of_pod_ips=$((num_of_pod_ips+1)) - fi - done - echo "Number of pods running with ip assigned $num_of_pod_ips" - - num_of_azure_endpoint_ips=$( cat azure_endpoints.json | jq -r '[.Endpoints | .[] | .IfnameToIPMap.eth0.IPv4[0].IP] | length' ) - azure_endpoint_ips=$( cat azure_endpoints.json | jq -r '(.Endpoints | .[] | .IfnameToIPMap.eth0.IPv4[0].IP) ' ) - echo "Number of azure endpoint ips : $num_of_azure_endpoint_ips" - - if [ "$num_of_pod_ips" != "$num_of_azure_endpoint_ips" ]; then - printf "Error: Number of pods in running state is less than total ips in the azure endpoint file" >&2 - exit 1 - fi - - echo "checking the ips in the azure endpoints file" - for ip in "${pod_ips[@]}" - do - find_in_array "$azure_endpoint_ips" "$ip" "azure_endpoints.json" - if [[ $? -eq 1 ]]; then - printf "Error: %s Not found in the azure_endpoints.json" "$ip" >&2 - exit 1 - fi - done - - num_of_cilium_endpoints=$(cat cilium_endpoints.json | jq -r '[.[] | select(.status.networking.addressing[0].ipv4 != null)] | length') - cilium_endpoint_ips=$(cat cilium_endpoints.json | jq -r '(.[] | select(.status.networking.addressing[0].ipv4 != null) | .status.networking.addressing[0].ipv4)') - echo "Number of cilium endpoints: $num_of_cilium_endpoints" - - if [ "$num_of_pod_ips" != "$num_of_cilium_endpoints" ]; then - printf "Error: Number of pods in running state is less than total ips in the cilium endpoint file" >&2 - exit 1 - fi - - for ip in "${pod_ips[@]}" - do - find_in_array "$cilium_endpoint_ips" "$ip" "cilium_endpoints.json" - if [[ $? -eq 1 ]]; then - printf "Error: %s Not found in the cilium_endpoints.json" "$ip" >&2 - exit 1 - fi - done - - num_of_cns_endpoints=$(cat cns_endpoints.json | jq -r '[.IPConfigurationStatus | .[] | select(.IPAddress != null)] | length') - cns_endpoint_ips=$(cat cns_endpoints.json | jq -r '(.IPConfigurationStatus | .[] | select(.IPAddress != null) | .IPAddress)') - echo "Number of cns endpoints: $num_of_cns_endpoints" - - if [ "$num_of_pod_ips" != "$num_of_cns_endpoints" ]; then - printf "Error: Number of pods in running state is less than total ips in the cns endpoint file" >&2 - exit 1 - fi - - for ip in "${pod_ips[@]}" - do - find_in_array "$cns_endpoint_ips" "$ip" "cns_endpoints.json" - if [[ $? -eq 1 ]]; then - printf "Error: %s Not found in the cns_endpoints.json" "$ip" >&2 - exit 1 - fi - done - - #We are restarting the systmemd network and checking that the connectivity works after the restart. For more details: https://github.com/cilium/cilium/issues/18706 - kubectl exec -i "$privileged_pod" -n kube-system -- bash -c "chroot /host /bin/bash -c 'systemctl restart systemd-networkd'" - rm -rf cilium_endpoints.json azure_endpoints.json cns_endpoints.json -done diff --git a/test/integration/k8s_test.go b/test/integration/k8s_test.go index 448f2329914..e97995197eb 100644 --- a/test/integration/k8s_test.go +++ b/test/integration/k8s_test.go @@ -9,17 +9,16 @@ import ( "flag" "fmt" "os" - "path/filepath" "testing" "time" "github.com/Azure/azure-container-networking/test/integration/goldpinger" - "github.com/Azure/azure-container-networking/test/integration/retry" + k8sutils "github.com/Azure/azure-container-networking/test/internal/k8sutils" + "github.com/Azure/azure-container-networking/test/internal/retry" v1 "k8s.io/client-go/kubernetes/typed/apps/v1" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/util/homedir" ) const ( @@ -38,7 +37,6 @@ const ( var ( defaultRetrier = retry.Retrier{Attempts: retryAttempts, Delay: retryDelaySec} - kubeconfig = flag.String("test-kubeconfig", filepath.Join(homedir.HomeDir(), ".kube", "config"), "(optional) absolute path to the kubeconfig file") delegatedSubnetID = flag.String("delegated-subnet-id", "", "delegated subnet id for node labeling") delegatedSubnetName = flag.String("subnet-name", "", "subnet name for node labeling") ) @@ -83,18 +81,18 @@ todo: */ func TestPodScaling(t *testing.T) { - clientset, err := mustGetClientset() + clientset, err := k8sutils.MustGetClientset() if err != nil { t.Fatal(err) } - restConfig := mustGetRestConfig(t) - deployment, err := mustParseDeployment(gpDeployment) + restConfig := k8sutils.MustGetRestConfig(t) + deployment, err := k8sutils.MustParseDeployment(gpDeployment) if err != nil { t.Fatal(err) } - daemonset, err := mustParseDaemonSet(gpDaemonset) + daemonset, err := k8sutils.MustParseDaemonSet(gpDaemonset) if err != nil { t.Fatal(err) } @@ -102,25 +100,25 @@ func TestPodScaling(t *testing.T) { ctx := context.Background() if shouldLabelNodes() { - mustLabelSwiftNodes(t, ctx, clientset, *delegatedSubnetID, *delegatedSubnetName) + k8sutils.MustLabelSwiftNodes(ctx, t, clientset, *delegatedSubnetID, *delegatedSubnetName) } else { t.Log("swift node labels not passed or set. skipping labeling") } - rbacCleanUpFn, err := mustSetUpClusterRBAC(ctx, clientset, gpClusterRolePath, gpClusterRoleBindingPath, gpServiceAccountPath) + rbacCleanUpFn, err := k8sutils.MustSetUpClusterRBAC(ctx, clientset, gpClusterRolePath, gpClusterRoleBindingPath, gpServiceAccountPath) if err != nil { t.Log(os.Getwd()) t.Fatal(err) } deploymentsClient := clientset.AppsV1().Deployments(deployment.Namespace) - err = mustCreateDeployment(ctx, deploymentsClient, deployment) + err = k8sutils.MustCreateDeployment(ctx, deploymentsClient, deployment) if err != nil { t.Fatal(err) } daemonsetClient := clientset.AppsV1().DaemonSets(daemonset.Namespace) - err = mustCreateDaemonset(ctx, daemonsetClient, daemonset) + err = k8sutils.MustCreateDaemonset(ctx, daemonsetClient, daemonset) if err != nil { t.Fatal(err) } @@ -256,7 +254,7 @@ func updateReplicaCount(t *testing.T, ctx context.Context, deployments v1.Deploy } t.Logf("setting deployment %s to %d replicas", name, replicas) - res.Spec.Replicas = int32ptr(int32(replicas)) + res.Spec.Replicas = k8sutils.Int32ToPtr(int32(replicas)) _, err = deployments.Update(ctx, res, metav1.UpdateOptions{}) return err }) diff --git a/test/integration/load/load_test.go b/test/integration/load/load_test.go new file mode 100644 index 00000000000..df656db26d1 --- /dev/null +++ b/test/integration/load/load_test.go @@ -0,0 +1,161 @@ +//go:build load + +package load + +import ( + "context" + "flag" + "testing" + "time" + + k8sutils "github.com/Azure/azure-container-networking/test/internal/k8sutils" + "github.com/Azure/azure-container-networking/test/validate" +) + +const ( + manifestDir = "../manifests" + noopdeployment = manifestDir + "/load/noop-deployment.yaml" + podLabelSelector = "load-test=true" +) + +var ( + osType = flag.String("os", "linux", "Operating system to run the test on") + cniType = flag.String("cni", "cilium", "CNI to run the test on") + iterations = flag.Int("iterations", 2, "Number of iterations to run the test for") + scaleUpReplicas = flag.Int("scaleup", 10, "Number of replicas to scale up to") + scaleDownReplicas = flag.Int("scaledown", 1, "Number of replicas to scale down to") + replicas = flag.Int("replicas", 1, "Number of replicas to scale up/down to") + validateStateFile = flag.Bool("validate-statefile", false, "Validate the state file") + skipWait = flag.Bool("skip-wait", false, "Skip waiting for pods to be ready") + restartCase = flag.Bool("restart-case", false, "In restart case, skip if we don't find state file") + namespace = "load-test" +) + +/* +In order to run the scale tests, you need a k8s cluster and its kubeconfig. +If no kubeconfig is passed, the test will attempt to find one in the default location for kubectl config. +Run the tests as follows: + +go test -timeout 30m -tags load -run ^TestLoad$ -tags=load + +The Load test scale the pods up/down on the cluster and validates the pods have IP. By default it runs the +cycle for 2 iterations. + +To validate the state file, set the flag -validate-statefile to true. By default it is set to false. +todo: consider adding the following scenarios +- [x] All pods should be assigned an IP. +- [x] Test the CNS state file. +- [x] Test the CNS Local cache. +- [x] Test the Cilium state file. +- [x] Test the Node restart. +- [ ] Test based on operating system. +- [ ] Test the HNS state file. +- [ ] Parameterize the os, cni and number of iterations. +- [ ] Add deployment yaml for windows. +*/ +func TestLoad(t *testing.T) { + clientset, err := k8sutils.MustGetClientset() + if err != nil { + t.Fatal(err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute) + defer cancel() + + err = k8sutils.MustCreateNamespace(ctx, clientset, namespace) + if err != nil { + t.Fatal(err) + } + + deployment, err := k8sutils.MustParseDeployment(noopdeployment) + if err != nil { + t.Fatal(err) + } + + deploymentsClient := clientset.AppsV1().Deployments(namespace) + err = k8sutils.MustCreateDeployment(ctx, deploymentsClient, deployment) + if err != nil { + t.Fatal(err) + } + + t.Log("Checking pods are running") + err = k8sutils.WaitForPodsRunning(ctx, clientset, namespace, podLabelSelector) + if err != nil { + t.Fatal(err) + } + + t.Log("Repeating the scale up/down cycle") + for i := 0; i < *iterations; i++ { + t.Log("Iteration ", i) + t.Log("Scale down deployment") + err = k8sutils.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, *scaleDownReplicas, *skipWait) + if err != nil { + t.Fatal(err) + } + t.Log("Scale up deployment") + err = k8sutils.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, *scaleUpReplicas, *skipWait) + if err != nil { + t.Fatal(err) + } + } + t.Log("Checking pods are running and IP assigned") + err = k8sutils.WaitForPodsRunning(ctx, clientset, "", "") + if err != nil { + t.Fatal(err) + } + + if *validateStateFile { + t.Run("Validate state file", TestValidateState) + } +} + +// TestValidateState validates the state file based on the os and cni type. +func TestValidateState(t *testing.T) { + clientset, err := k8sutils.MustGetClientset() + if err != nil { + t.Fatal(err) + } + config := k8sutils.MustGetRestConfig(t) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute) + defer cancel() + + t.Log("Validating the state file") + validatorClient := validate.GetValidatorClient(*osType) + validator := validatorClient.CreateClient(ctx, clientset, config, namespace, *cniType, *restartCase) + + err = validator.ValidateStateFile() + if err != nil { + t.Fatal(err) + } + + //We are restarting the systmemd network and checking that the connectivity works after the restart. For more details: https://github.com/cilium/cilium/issues/18706 + t.Log("Validating the restart network scenario") + err = validator.ValidateRestartNetwork() + if err != nil { + t.Fatal(err) + } +} + +// TestScaleDeployment scales the deployment up/down based on the replicas passed. +// go test -timeout 30m -tags load -run ^TestScaleDeployment$ -tags=load -replicas 10 +func TestScaleDeployment(t *testing.T) { + t.Log("Scale deployment") + clientset, err := k8sutils.MustGetClientset() + if err != nil { + t.Fatal(err) + } + ctx := context.Background() + err = k8sutils.MustCreateNamespace(ctx, clientset, namespace) + if err != nil { + t.Fatal(err) + } + deployment, err := k8sutils.MustParseDeployment(noopdeployment) + if err != nil { + t.Fatal(err) + } + deploymentsClient := clientset.AppsV1().Deployments(namespace) + err = k8sutils.MustScaleDeployment(ctx, deploymentsClient, deployment, clientset, namespace, podLabelSelector, *replicas, *skipWait) + if err != nil { + t.Fatal(err) + } +} diff --git a/hack/manifests/pod.yaml b/test/integration/manifests/load/noop-deployment.yaml similarity index 64% rename from hack/manifests/pod.yaml rename to test/integration/manifests/load/noop-deployment.yaml index aaed78ae1de..85272941c68 100644 --- a/hack/manifests/pod.yaml +++ b/test/integration/manifests/load/noop-deployment.yaml @@ -1,19 +1,20 @@ +# No-op for linux based cluster apiVersion: apps/v1 kind: Deployment metadata: - name: container - namespace: default + name: load-test + namespace: load-test spec: selector: matchLabels: - app: container + load-test: "true" template: metadata: labels: - app: container + load-test: "true" spec: containers: - - name: ubuntu + - name: no-op image: mcr.microsoft.com/oss/kubernetes/pause:3.6 imagePullPolicy: Always securityContext: diff --git a/hack/manifests/hostprocess.yaml b/test/integration/manifests/load/privileged-daemonset.yaml similarity index 100% rename from hack/manifests/hostprocess.yaml rename to test/integration/manifests/load/privileged-daemonset.yaml diff --git a/test/integration/setup_test.go b/test/integration/setup_test.go index 4900b49e68f..5ece6e64982 100644 --- a/test/integration/setup_test.go +++ b/test/integration/setup_test.go @@ -10,6 +10,7 @@ import ( "strconv" "testing" + k8sutils "github.com/Azure/azure-container-networking/test/internal/k8sutils" "k8s.io/client-go/kubernetes" ) @@ -71,7 +72,7 @@ func TestMain(m *testing.M) { os.Exit(exitCode) }() - clientset, err := mustGetClientset() + clientset, err := k8sutils.MustGetClientset() if err != nil { return } @@ -98,26 +99,26 @@ func installCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, l cnsVersion := os.Getenv(envCNSVersion) // setup daemonset - cns, err := mustParseDaemonSet(cnsDaemonSetPath) + cns, err := k8sutils.MustParseDaemonSet(cnsDaemonSetPath) if err != nil { return nil, err } - image, _ := parseImageString(cns.Spec.Template.Spec.Containers[0].Image) - cns.Spec.Template.Spec.Containers[0].Image = getImageString(image, cnsVersion) + image, _ := k8sutils.ParseImageString(cns.Spec.Template.Spec.Containers[0].Image) + cns.Spec.Template.Spec.Containers[0].Image = k8sutils.GetImageString(image, cnsVersion) // check environment scenario log.Printf("Checking environment scenario") if installBoolDropgz := os.Getenv(envTestDropgz); installBoolDropgz != "" { if testDropgzScenario, err := strconv.ParseBool(installBoolDropgz); err == nil && testDropgzScenario == true { log.Printf("Env %v set to true, deploy cniTest.Dockerfile", envTestDropgz) - initImage, _ := parseImageString("acnpublic.azurecr.io/cni-dropgz-test:latest") - cns.Spec.Template.Spec.InitContainers[0].Image = getImageString(initImage, cniDropgzVersion) + initImage, _ := k8sutils.ParseImageString("acnpublic.azurecr.io/cni-dropgz-test:latest") + cns.Spec.Template.Spec.InitContainers[0].Image = k8sutils.GetImageString(initImage, cniDropgzVersion) } } else { log.Printf("Env %v not set to true, deploying cni.Dockerfile", envTestDropgz) - initImage, _ := parseImageString(cns.Spec.Template.Spec.InitContainers[0].Image) - cns.Spec.Template.Spec.InitContainers[0].Image = getImageString(initImage, cniDropgzVersion) + initImage, _ := k8sutils.ParseImageString(cns.Spec.Template.Spec.InitContainers[0].Image) + cns.Spec.Template.Spec.InitContainers[0].Image = k8sutils.GetImageString(initImage, cniDropgzVersion) } if installBool1 := os.Getenv(envInstallAzureVnet); installBool1 != "" { @@ -126,7 +127,7 @@ func installCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, l cns.Spec.Template.Spec.InitContainers[0].Args = []string{"deploy", "azure-vnet", "-o", "/opt/cni/bin/azure-vnet", "azure-vnet-telemetry", "-o", "/opt/cni/bin/azure-vnet-telemetry", "azure-vnet-ipam", "-o", "/opt/cni/bin/azure-vnet-ipam", "azure-swift.conflist", "-o", "/etc/cni/net.d/10-azure.conflist"} } // setup the CNS swiftconfigmap - if err := mustSetupConfigMap(ctx, clientset, cnsSwiftConfigMapPath); err != nil { + if err := k8sutils.MustSetupConfigMap(ctx, clientset, cnsSwiftConfigMapPath); err != nil { return nil, err } } else { @@ -139,7 +140,7 @@ func installCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, l cns.Spec.Template.Spec.InitContainers[0].Args = []string{"deploy", "azure-ipam", "-o", "/opt/cni/bin/azure-ipam"} } // setup the CNS ciliumconfigmap - if err := mustSetupConfigMap(ctx, clientset, cnsCiliumConfigMapPath); err != nil { + if err := k8sutils.MustSetupConfigMap(ctx, clientset, cnsCiliumConfigMapPath); err != nil { return nil, err } } else { @@ -152,7 +153,7 @@ func installCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, l cns.Spec.Template.Spec.InitContainers[0].Args = []string{"deploy", "azure-ipam", "-o", "/opt/cni/bin/azure-ipam"} } // setup the CNS ciliumconfigmap - if err := mustSetupConfigMap(ctx, clientset, cnsOverlayConfigMapPath); err != nil { + if err := k8sutils.MustSetupConfigMap(ctx, clientset, cnsOverlayConfigMapPath); err != nil { return nil, err } } else { @@ -164,25 +165,25 @@ func installCNSDaemonset(ctx context.Context, clientset *kubernetes.Clientset, l log.Printf("Installing CNS with image %s", cns.Spec.Template.Spec.Containers[0].Image) // setup common RBAC, ClusteerRole, ClusterRoleBinding, ServiceAccount - if _, err := mustSetUpClusterRBAC(ctx, clientset, cnsClusterRolePath, cnsClusterRoleBindingPath, cnsServiceAccountPath); err != nil { + if _, err := k8sutils.MustSetUpClusterRBAC(ctx, clientset, cnsClusterRolePath, cnsClusterRoleBindingPath, cnsServiceAccountPath); err != nil { return nil, err } // setup RBAC, Role, RoleBinding - if err := mustSetUpRBAC(ctx, clientset, cnsRolePath, cnsRoleBindingPath); err != nil { + if err := k8sutils.MustSetUpRBAC(ctx, clientset, cnsRolePath, cnsRoleBindingPath); err != nil { return nil, err } - if err = mustCreateDaemonset(ctx, cnsDaemonsetClient, cns); err != nil { + if err = k8sutils.MustCreateDaemonset(ctx, cnsDaemonsetClient, cns); err != nil { return nil, err } - if err = waitForPodsRunning(ctx, clientset, cns.Namespace, cnsLabelSelector); err != nil { + if err = k8sutils.WaitForPodsRunning(ctx, clientset, cns.Namespace, cnsLabelSelector); err != nil { return nil, err } cleanupds := func() error { - if err := exportLogsByLabelSelector(ctx, clientset, cns.Namespace, cnsLabelSelector, logDir); err != nil { + if err := k8sutils.ExportLogsByLabelSelector(ctx, clientset, cns.Namespace, cnsLabelSelector, logDir); err != nil { return err } return nil diff --git a/test/integration/label.go b/test/internal/k8sutils/label.go similarity index 97% rename from test/integration/label.go rename to test/internal/k8sutils/label.go index a8f439f0c2c..51079c43a02 100644 --- a/test/integration/label.go +++ b/test/internal/k8sutils/label.go @@ -1,4 +1,4 @@ -package k8s +package k8sutils import ( "context" diff --git a/test/integration/utils_test.go b/test/internal/k8sutils/utils.go similarity index 58% rename from test/integration/utils_test.go rename to test/internal/k8sutils/utils.go index 857180850fb..862b3d20d2f 100644 --- a/test/integration/utils_test.go +++ b/test/internal/k8sutils/utils.go @@ -1,38 +1,47 @@ -//go:build integration - -package k8s +package k8sutils import ( "bytes" "context" - "errors" + "flag" "io" "log" + "os" + "path/filepath" "strings" + "testing" "time" // crd "dnc/requestcontroller/kubernetes" - "os" - "testing" - "github.com/Azure/azure-container-networking/test/integration/retry" - apiv1 "k8s.io/api/core/v1" + "github.com/Azure/azure-container-networking/test/internal/retry" + "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/yaml" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + typedappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/tools/remotecommand" + "k8s.io/client-go/util/homedir" ) const ( DelegatedSubnetIDLabel = "kubernetes.azure.com/podnetwork-delegationguid" SubnetNameLabel = "kubernetes.azure.com/podnetwork-subnet" + + // RetryAttempts is the number of times to retry a test. + RetryAttempts = 30 + RetryDelay = 30 * time.Second ) -func mustGetClientset() (*kubernetes.Clientset, error) { - config, err := clientcmd.BuildConfigFromFlags("", *kubeconfig) +var Kubeconfig = flag.String("test-kubeconfig", filepath.Join(homedir.HomeDir(), ".kube", "config"), "(optional) absolute path to the kubeconfig file") + +func MustGetClientset() (*kubernetes.Clientset, error) { + config, err := clientcmd.BuildConfigFromFlags("", *Kubeconfig) if err != nil { return nil, err } @@ -43,8 +52,8 @@ func mustGetClientset() (*kubernetes.Clientset, error) { return clientset, nil } -func mustGetRestConfig(t *testing.T) *rest.Config { - config, err := clientcmd.BuildConfigFromFlags("", *kubeconfig) +func MustGetRestConfig(t *testing.T) *rest.Config { + config, err := clientcmd.BuildConfigFromFlags("", *Kubeconfig) if err != nil { t.Fatal(err) } @@ -65,7 +74,7 @@ func mustParseResource(path string, out interface{}) error { return err } -func mustLabelSwiftNodes(t *testing.T, ctx context.Context, clientset *kubernetes.Clientset, delegatedSubnetID, delegatedSubnetName string) { +func MustLabelSwiftNodes(ctx context.Context, t *testing.T, clientset *kubernetes.Clientset, delegatedSubnetID, delegatedSubnetName string) { swiftNodeLabels := map[string]string{ DelegatedSubnetIDLabel: delegatedSubnetID, SubnetNameLabel: delegatedSubnetName, @@ -84,7 +93,7 @@ func mustLabelSwiftNodes(t *testing.T, ctx context.Context, clientset *kubernete } } -func mustSetUpClusterRBAC(ctx context.Context, clientset *kubernetes.Clientset, clusterRolePath, clusterRoleBindingPath, serviceAccountPath string) (func(), error) { +func MustSetUpClusterRBAC(ctx context.Context, clientset *kubernetes.Clientset, clusterRolePath, clusterRoleBindingPath, serviceAccountPath string) (func(), error) { var ( err error clusterRole v1.ClusterRole @@ -139,7 +148,7 @@ func mustSetUpClusterRBAC(ctx context.Context, clientset *kubernetes.Clientset, return cleanupFunc, nil } -func mustSetUpRBAC(ctx context.Context, clientset *kubernetes.Clientset, rolePath, roleBindingPath string) error { +func MustSetUpRBAC(ctx context.Context, clientset *kubernetes.Clientset, rolePath, roleBindingPath string) error { var ( err error role v1.Role @@ -168,7 +177,7 @@ func mustSetUpRBAC(ctx context.Context, clientset *kubernetes.Clientset, rolePat return nil } -func mustSetupConfigMap(ctx context.Context, clientset *kubernetes.Clientset, configMapPath string) error { +func MustSetupConfigMap(ctx context.Context, clientset *kubernetes.Clientset, configMapPath string) error { var ( err error cm corev1.ConfigMap @@ -183,24 +192,24 @@ func mustSetupConfigMap(ctx context.Context, clientset *kubernetes.Clientset, co return mustCreateConfigMap(ctx, configmaps, cm) } -func int32ptr(i int32) *int32 { return &i } +func Int32ToPtr(i int32) *int32 { return &i } -func parseImageString(s string) (image, version string) { +func ParseImageString(s string) (image, version string) { sl := strings.Split(s, ":") return sl[0], sl[1] } -func getImageString(image, version string) string { +func GetImageString(image, version string) string { return image + ":" + version } -func waitForPodsRunning(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector string) error { +func WaitForPodsRunning(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector string) error { podsClient := clientset.CoreV1().Pods(namespace) checkPodIPsFn := func() error { podList, err := podsClient.List(ctx, metav1.ListOptions{LabelSelector: labelselector}) if err != nil { - return err + return errors.Wrapf(err, "could not list pods with label selector %s", labelselector) } if len(podList.Items) == 0 { @@ -208,7 +217,7 @@ func waitForPodsRunning(ctx context.Context, clientset *kubernetes.Clientset, na } for _, pod := range podList.Items { - if pod.Status.Phase == apiv1.PodPending { + if pod.Status.Phase == corev1.PodPending { return errors.New("some pods still pending") } } @@ -222,11 +231,51 @@ func waitForPodsRunning(ctx context.Context, clientset *kubernetes.Clientset, na return nil } - retrier := retry.Retrier{Attempts: 10, Delay: 6 * time.Second} + retrier := retry.Retrier{Attempts: RetryAttempts, Delay: RetryDelay} return retrier.Do(ctx, checkPodIPsFn) } -func exportLogsByLabelSelector(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector, logDir string) error { +func WaitForPodDeployment(ctx context.Context, clientset *kubernetes.Clientset, namespace, deploymentName, podLabelSelector string, replicas int) error { + podsClient := clientset.CoreV1().Pods(namespace) + deploymentsClient := clientset.AppsV1().Deployments(namespace) + checkPodDeploymentFn := func() error { + deployment, err := deploymentsClient.Get(ctx, deploymentName, metav1.GetOptions{}) + if err != nil { + return errors.Wrapf(err, "could not get deployment %s", deploymentName) + } + + if deployment.Status.AvailableReplicas != int32(replicas) { + return errors.New("deployment does not have the expected number of available replicas") + } + + podList, err := podsClient.List(ctx, metav1.ListOptions{LabelSelector: podLabelSelector}) + if err != nil { + return errors.Wrapf(err, "could not list pods with label selector %s", podLabelSelector) + } + + log.Printf("deployment %s has %d pods, expected %d", deploymentName, len(podList.Items), replicas) + if len(podList.Items) != replicas { + return errors.New("some pods of the deployment are still not ready") + } + return nil + } + + retrier := retry.Retrier{Attempts: RetryAttempts, Delay: RetryDelay} + return errors.Wrapf(retrier.Do(ctx, checkPodDeploymentFn), "could not wait for deployment %s", deploymentName) +} + +func MustUpdateReplica(ctx context.Context, deploymentsClient typedappsv1.DeploymentInterface, deploymentName string, replicas int32) error { + deployment, err := deploymentsClient.Get(ctx, deploymentName, metav1.GetOptions{}) + if err != nil { + return errors.Wrapf(err, "could not get deployment %s", deploymentName) + } + + deployment.Spec.Replicas = Int32ToPtr(replicas) + _, err = deploymentsClient.Update(ctx, deployment, metav1.UpdateOptions{}) + return errors.Wrapf(err, "could not update deployment %s", deploymentName) +} + +func ExportLogsByLabelSelector(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector, logDir string) error { podsClient := clientset.CoreV1().Pods(namespace) podLogOpts := corev1.PodLogOptions{} logExtension := ".log" @@ -278,3 +327,36 @@ func writeToFile(dir, fileName, str string) error { _, err = f.WriteString(str) return err } + +func ExecCmdOnPod(ctx context.Context, clientset *kubernetes.Clientset, namespace, podName string, cmd []string, config *rest.Config) ([]byte, error) { + req := clientset.CoreV1().RESTClient().Post(). + Resource("pods"). + Name(podName). + Namespace(namespace). + SubResource("exec"). + VersionedParams(&corev1.PodExecOptions{ + Command: cmd, + Stdin: false, + Stdout: true, + Stderr: true, + TTY: true, + }, scheme.ParameterCodec) + + exec, err := remotecommand.NewSPDYExecutor(config, "POST", req.URL()) + if err != nil { + return []byte{}, errors.Wrapf(err, "error in creating executor for req %s", req.URL()) + } + + var stdout, stderr bytes.Buffer + err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{ + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + Tty: true, + }) + if err != nil { + return []byte{}, errors.Wrapf(err, "error in executing command %s", cmd) + } + + return stdout.Bytes(), nil +} diff --git a/test/integration/utils_create_test.go b/test/internal/k8sutils/utils_create.go similarity index 73% rename from test/integration/utils_create_test.go rename to test/internal/k8sutils/utils_create.go index 6a24bc08faa..349264cfa1b 100644 --- a/test/integration/utils_create_test.go +++ b/test/internal/k8sutils/utils_create.go @@ -1,24 +1,22 @@ -//go:build integration - -package k8s +package k8sutils import ( "context" "log" - // crd "dnc/requestcontroller/kubernetes" - + "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" typedappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1" typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" typedrbacv1 "k8s.io/client-go/kubernetes/typed/rbac/v1" ) -func mustCreateDaemonset(ctx context.Context, daemonsets typedappsv1.DaemonSetInterface, ds appsv1.DaemonSet) error { +func MustCreateDaemonset(ctx context.Context, daemonsets typedappsv1.DaemonSetInterface, ds appsv1.DaemonSet) error { if err := mustDeleteDaemonset(ctx, daemonsets, ds); err != nil { return err } @@ -30,7 +28,7 @@ func mustCreateDaemonset(ctx context.Context, daemonsets typedappsv1.DaemonSetIn return nil } -func mustCreateDeployment(ctx context.Context, deployments typedappsv1.DeploymentInterface, d appsv1.Deployment) error { +func MustCreateDeployment(ctx context.Context, deployments typedappsv1.DeploymentInterface, d appsv1.Deployment) error { if err := mustDeleteDeployment(ctx, deployments, d); err != nil { return err } @@ -125,3 +123,41 @@ func mustCreateConfigMap(ctx context.Context, cmi typedcorev1.ConfigMapInterface return nil } + +func MustScaleDeployment(ctx context.Context, + deploymentsClient typedappsv1.DeploymentInterface, + deployment appsv1.Deployment, + clientset *kubernetes.Clientset, + namespace, + podLabelSelector string, + replicas int, + skipWait bool, +) error { + log.Printf("Scaling deployment %v to %v replicas", deployment.Name, replicas) + err := MustUpdateReplica(ctx, deploymentsClient, deployment.Name, int32(replicas)) + if err != nil { + return err + } + + if !skipWait { + log.Printf("Waiting for pods to be ready..") + err = WaitForPodDeployment(ctx, clientset, namespace, deployment.Name, podLabelSelector, replicas) + if err != nil { + return err + } + } + return nil +} + +func MustCreateNamespace(ctx context.Context, clienset *kubernetes.Clientset, namespace string) error { + _, err := clienset.CoreV1().Namespaces().Create(ctx, &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: namespace, + }, + }, metav1.CreateOptions{}) + + if !apierrors.IsAlreadyExists(err) { + return errors.Wrapf(err, "failed to create namespace %v", namespace) + } + return nil +} diff --git a/test/integration/utils_delete_test.go b/test/internal/k8sutils/utils_delete.go similarity index 64% rename from test/integration/utils_delete_test.go rename to test/internal/k8sutils/utils_delete.go index e012a5240f6..68d72b40974 100644 --- a/test/integration/utils_delete_test.go +++ b/test/internal/k8sutils/utils_delete.go @@ -1,13 +1,13 @@ -//go:build integration - -package k8s +package k8sutils import ( "context" + "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" typedappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1" ) @@ -30,3 +30,12 @@ func mustDeleteDeployment(ctx context.Context, deployments typedappsv1.Deploymen return nil } + +func MustDeleteNamespace(ctx context.Context, clienset *kubernetes.Clientset, namespace string) error { + if err := clienset.CoreV1().Namespaces().Delete(ctx, namespace, metav1.DeleteOptions{}); err != nil { + if !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "failed to delete namespace %v", namespace) + } + } + return nil +} diff --git a/test/internal/k8sutils/utils_get.go b/test/internal/k8sutils/utils_get.go new file mode 100644 index 00000000000..936faa4a4c8 --- /dev/null +++ b/test/internal/k8sutils/utils_get.go @@ -0,0 +1,42 @@ +package k8sutils + +import ( + "context" + + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +func GetNodeList(ctx context.Context, clientset *kubernetes.Clientset) (*corev1.NodeList, error) { + nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, errors.Wrap(err, "failed to get nodes") + } + + return nodes, nil +} + +func GetPodsByNode(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector, nodeName string) (*corev1.PodList, error) { + pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + FieldSelector: "spec.nodeName=" + nodeName, + LabelSelector: labelselector, + }) + if err != nil { + return nil, errors.Wrapf(err, "failed to get pods by node %s", nodeName) + } + return pods, nil +} + +func GetPodsIpsByNode(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelselector, nodeName string) ([]string, error) { + pods, err := GetPodsByNode(ctx, clientset, namespace, labelselector, nodeName) + if err != nil { + return nil, err + } + ips := make([]string, 0, len(pods.Items)) + for index := range pods.Items { + ips = append(ips, pods.Items[index].Status.PodIP) + } + return ips, nil +} diff --git a/test/integration/utils_parse_test.go b/test/internal/k8sutils/utils_parse.go similarity index 87% rename from test/integration/utils_parse_test.go rename to test/internal/k8sutils/utils_parse.go index 787121176ac..53146a0c6d4 100644 --- a/test/integration/utils_parse_test.go +++ b/test/internal/k8sutils/utils_parse.go @@ -1,6 +1,4 @@ -//go:build integration - -package k8s +package k8sutils import ( appsv1 "k8s.io/api/apps/v1" @@ -8,13 +6,13 @@ import ( rbacv1 "k8s.io/api/rbac/v1" ) -func mustParseDaemonSet(path string) (appsv1.DaemonSet, error) { +func MustParseDaemonSet(path string) (appsv1.DaemonSet, error) { var ds appsv1.DaemonSet err := mustParseResource(path, &ds) return ds, err } -func mustParseDeployment(path string) (appsv1.Deployment, error) { +func MustParseDeployment(path string) (appsv1.Deployment, error) { var depl appsv1.Deployment err := mustParseResource(path, &depl) return depl, err diff --git a/test/integration/retry/retry.go b/test/internal/retry/retry.go similarity index 96% rename from test/integration/retry/retry.go rename to test/internal/retry/retry.go index 051793aeea9..7b46f9266e8 100644 --- a/test/integration/retry/retry.go +++ b/test/internal/retry/retry.go @@ -1,5 +1,3 @@ -// +build integration - // todo: there are more robust retry packages out there, discuss with team package retry diff --git a/test/validate/client.go b/test/validate/client.go new file mode 100644 index 00000000000..bc6e0aeb549 --- /dev/null +++ b/test/validate/client.go @@ -0,0 +1,38 @@ +package validate + +import ( + "context" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +type Validator struct { + ctx context.Context + clientset *kubernetes.Clientset + config *rest.Config + namespace string + cni string + restartCase bool +} + +// Todo: Add the validation for the data path function for the linux/windows client. +type IValidator interface { + ValidateStateFile() error + ValidateRestartNetwork() error + // ValidateDataPath() error +} + +type validatorClient interface { + CreateClient(ctx context.Context, clienset *kubernetes.Clientset, config *rest.Config, namespace, cni string, restartCase bool) IValidator +} + +// Func to get the type of validator client based on the Operating system. +func GetValidatorClient(os string) validatorClient { + switch os { + case "linux": + return &LinuxClient{} + default: + return nil + } +} diff --git a/test/validate/linux_validate.go b/test/validate/linux_validate.go new file mode 100644 index 00000000000..d2839f4098d --- /dev/null +++ b/test/validate/linux_validate.go @@ -0,0 +1,232 @@ +package validate + +import ( + "context" + "encoding/json" + "log" + + "github.com/Azure/azure-container-networking/cns" + restserver "github.com/Azure/azure-container-networking/cns/restserver" + k8sutils "github.com/Azure/azure-container-networking/test/internal/k8sutils" + "github.com/pkg/errors" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +const ( + privilegedDaemonSetPath = "../manifests/load/privileged-daemonset.yaml" + privilegedLabelSelector = "app=privileged-daemonset" + privilegedNamespace = "kube-system" + + cnsLabelSelector = "k8s-app=azure-cns" + ciliumLabelSelector = "k8s-app=cilium" +) + +var ( + restartNetworkCmd = []string{"bash", "-c", "chroot /host /bin/bash -c 'systemctl restart systemd-networkd'"} + cnsStateFileCmd = []string{"bash", "-c", "cat /var/run/azure-cns/azure-endpoints.json"} + ciliumStateFileCmd = []string{"bash", "-c", "cilium endpoint list -o json"} + cnsLocalCacheCmd = []string{"curl", "localhost:10090/debug/ipaddresses", "-d", "{\"IPConfigStateFilter\":[\"Assigned\"]}"} +) + +type stateFileIpsFunc func([]byte) (map[string]string, error) + +type LinuxClient struct{} + +type LinuxValidator struct { + Validator +} + +type CnsState struct { + Endpoints map[string]restserver.EndpointInfo `json:"Endpoints"` +} + +type CNSLocalCache struct { + IPConfigurationStatus []cns.IPConfigurationStatus `json:"IPConfigurationStatus"` +} + +type CiliumEndpointStatus struct { + Status NetworkingStatus `json:"status"` +} + +type NetworkingStatus struct { + Networking NetworkingAddressing `json:"networking"` +} + +type NetworkingAddressing struct { + Addresses []Address `json:"addressing"` + InterfaceName string `json:"interface-name"` +} + +type Address struct { + Addr string `json:"ipv4"` +} + +func (l *LinuxClient) CreateClient(ctx context.Context, clienset *kubernetes.Clientset, config *rest.Config, namespace, cni string, restartCase bool) IValidator { + // deploy privileged pod + privilegedDaemonSet, err := k8sutils.MustParseDaemonSet(privilegedDaemonSetPath) + if err != nil { + panic(err) + } + daemonsetClient := clienset.AppsV1().DaemonSets(privilegedNamespace) + err = k8sutils.MustCreateDaemonset(ctx, daemonsetClient, privilegedDaemonSet) + if err != nil { + panic(err) + } + err = k8sutils.WaitForPodsRunning(ctx, clienset, privilegedNamespace, privilegedLabelSelector) + if err != nil { + panic(err) + } + return &LinuxValidator{ + Validator: Validator{ + ctx: ctx, + clientset: clienset, + config: config, + namespace: namespace, + cni: cni, + restartCase: restartCase, + }, + } +} + +// Todo: Based on cni version validate different state files +func (v *LinuxValidator) ValidateStateFile() error { + checks := []struct { + name string + stateFileIps func([]byte) (map[string]string, error) + podLabelSelector string + podNamespace string + cmd []string + }{ + {"cns", cnsStateFileIps, cnsLabelSelector, privilegedNamespace, cnsStateFileCmd}, + {"cilium", ciliumStateFileIps, ciliumLabelSelector, privilegedNamespace, ciliumStateFileCmd}, + {"cns cache", cnsCacheStateFileIps, cnsLabelSelector, privilegedNamespace, cnsLocalCacheCmd}, + } + + for _, check := range checks { + err := v.validate(check.stateFileIps, check.cmd, check.name, check.podNamespace, check.podLabelSelector) + if err != nil { + return err + } + } + return nil +} + +func (v *LinuxValidator) ValidateRestartNetwork() error { + nodes, err := k8sutils.GetNodeList(v.ctx, v.clientset) + if err != nil { + return errors.Wrapf(err, "failed to get node list") + } + + for index := range nodes.Items { + // get the privileged pod + pod, err := k8sutils.GetPodsByNode(v.ctx, v.clientset, privilegedNamespace, privilegedLabelSelector, nodes.Items[index].Name) + if err != nil { + return errors.Wrapf(err, "failed to get privileged pod") + } + + privelegedPod := pod.Items[0] + // exec into the pod to get the state file + _, err = k8sutils.ExecCmdOnPod(v.ctx, v.clientset, privilegedNamespace, privelegedPod.Name, restartNetworkCmd, v.config) + if err != nil { + return errors.Wrapf(err, "failed to exec into privileged pod") + } + err = k8sutils.WaitForPodsRunning(v.ctx, v.clientset, "", "") + if err != nil { + return errors.Wrapf(err, "failed to wait for pods running") + } + } + return nil +} + +func cnsStateFileIps(result []byte) (map[string]string, error) { + var cnsResult CnsState + err := json.Unmarshal(result, &cnsResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal cns endpoint list") + } + + cnsPodIps := make(map[string]string) + for _, v := range cnsResult.Endpoints { + for ifName, ip := range v.IfnameToIPMap { + if ifName == "eth0" { + ip := ip.IPv4[0].IP.String() + cnsPodIps[ip] = v.PodName + } + } + } + return cnsPodIps, nil +} + +func ciliumStateFileIps(result []byte) (map[string]string, error) { + var ciliumResult []CiliumEndpointStatus + err := json.Unmarshal(result, &ciliumResult) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal cilium endpoint list") + } + + ciliumPodIps := make(map[string]string) + for _, v := range ciliumResult { + for _, addr := range v.Status.Networking.Addresses { + if addr.Addr != "" { + ciliumPodIps[addr.Addr] = v.Status.Networking.InterfaceName + } + } + } + return ciliumPodIps, nil +} + +func cnsCacheStateFileIps(result []byte) (map[string]string, error) { + var cnsLocalCache CNSLocalCache + + err := json.Unmarshal(result, &cnsLocalCache) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal cns local cache") + } + + cnsPodIps := make(map[string]string) + for index := range cnsLocalCache.IPConfigurationStatus { + cnsPodIps[cnsLocalCache.IPConfigurationStatus[index].IPAddress] = cnsLocalCache.IPConfigurationStatus[index].PodInfo.Name() + } + return cnsPodIps, nil +} + +func (v *LinuxValidator) validate(stateFileIps stateFileIpsFunc, cmd []string, checkType, namespace, labelSelector string) error { + log.Printf("Validating %s state file", checkType) + nodes, err := k8sutils.GetNodeList(v.ctx, v.clientset) + if err != nil { + return errors.Wrapf(err, "failed to get node list") + } + + for index := range nodes.Items { + // get the privileged pod + pod, err := k8sutils.GetPodsByNode(v.ctx, v.clientset, namespace, labelSelector, nodes.Items[index].Name) + if err != nil { + return errors.Wrapf(err, "failed to get privileged pod") + } + podName := pod.Items[0].Name + // exec into the pod to get the state file + result, err := k8sutils.ExecCmdOnPod(v.ctx, v.clientset, namespace, podName, cmd, v.config) + if err != nil { + return errors.Wrapf(err, "failed to exec into privileged pod") + } + filePodIps, err := stateFileIps(result) + if err != nil { + return errors.Wrapf(err, "failed to get pod ips from state file") + } + if len(filePodIps) == 0 && v.restartCase { + log.Printf("No pods found on node %s", nodes.Items[index].Name) + continue + } + // get the pod ips + podIps := getPodIPsWithoutNodeIP(v.ctx, v.clientset, nodes.Items[index]) + + check := compareIPs(filePodIps, podIps) + + if !check { + return errors.Wrapf(errors.New("State file validation failed"), "for %s on node %s", checkType, nodes.Items[index].Name) + } + } + log.Printf("State file validation for %s passed", checkType) + return nil +} diff --git a/test/validate/utils.go b/test/validate/utils.go new file mode 100644 index 00000000000..7180c7bc666 --- /dev/null +++ b/test/validate/utils.go @@ -0,0 +1,39 @@ +package validate + +import ( + "context" + + "github.com/Azure/azure-container-networking/test/internal/k8sutils" + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes" +) + +func compareIPs(expected map[string]string, actual []string) bool { + if len(expected) != len(actual) { + return false + } + + for _, ip := range actual { + if _, ok := expected[ip]; !ok { + return false + } + } + + return true +} + +// func to get the pods ip without the node ip (ie. host network as false) +func getPodIPsWithoutNodeIP(ctx context.Context, clientset *kubernetes.Clientset, node corev1.Node) []string { + podsIpsWithoutNodeIP := []string{} + podIPs, err := k8sutils.GetPodsIpsByNode(ctx, clientset, "", "", node.Name) + if err != nil { + return podsIpsWithoutNodeIP + } + nodeIP := node.Status.Addresses[0].Address + for _, podIP := range podIPs { + if podIP != nodeIP { + podsIpsWithoutNodeIP = append(podsIpsWithoutNodeIP, podIP) + } + } + return podsIpsWithoutNodeIP +}