diff --git a/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml b/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml index cdd388091d..2323b6c689 100644 --- a/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml +++ b/.pipelines/cni/cilium/cilium-overlay-load-test-template.yaml @@ -84,6 +84,7 @@ stages: os: linux iterations: ${ITERATIONS_CILIUM} nodeCount: ${{ parameters.nodeCount }} + cni: cilium - template: ../load-test-templates/validate-state-template.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) @@ -94,8 +95,10 @@ stages: - template: ../load-test-templates/restart-node-template.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} nodeCount: ${{ parameters.nodeCount }} scaleup: ${SCALEUP_CILIUM} + cni: cilium - template: ../load-test-templates/validate-state-template.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) @@ -108,8 +111,10 @@ stages: - template: ../load-test-templates/restart-cns-template.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} scaleup: ${SCALEUP_CILIUM} nodeCount: ${{ parameters.nodeCount }} + cni: cilium - job: cni_tests displayName: "Cilium Test" dependsOn: restart_cns @@ -141,4 +146,18 @@ stages: retryCountOnTaskFailure: 6 name: "CiliumConnectivityTests" displayName: "Run Cilium Connectivity Tests" - + - job: logs + displayName: "Log Failure" + dependsOn: + - deploy_cilium_components + - deploy_pods + - restart_nodes + - restart_cns + - cni_tests + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: linux + cni: cilium diff --git a/.pipelines/cni/cilium/nightly-release-test.yml b/.pipelines/cni/cilium/nightly-release-test.yml index d8bd5c4d5e..da5c668c93 100644 --- a/.pipelines/cni/cilium/nightly-release-test.yml +++ b/.pipelines/cni/cilium/nightly-release-test.yml @@ -88,11 +88,23 @@ stages: name: "cilium_nightly" testDropgz: "" clusterName: ciliumnightly-$(commitID) + - job: logs + displayName: "Log Failure" + dependsOn: + - cilium_nightly + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ciliumnightly-$(commitID) + os: linux + cni: cilium - job: delete displayName: Delete Cluster condition: always() dependsOn: - cilium_nightly + - logs pool: name: "$(BUILD_POOL_NAME_DEFAULT)" strategy: diff --git a/.pipelines/cni/load-test-templates/pod-deployment-template.yaml b/.pipelines/cni/load-test-templates/pod-deployment-template.yaml index b21e0438cf..33d2fd60a0 100644 --- a/.pipelines/cni/load-test-templates/pod-deployment-template.yaml +++ b/.pipelines/cni/load-test-templates/pod-deployment-template.yaml @@ -4,6 +4,7 @@ parameters: os: "" iterations: 4 nodeCount: 10 + cni: "" steps: - task: AzureCLI@1 @@ -20,3 +21,10 @@ steps: cd test/integration/load scale=$(( ${{ parameters.scaleup }} * ${{ parameters.nodeCount }} )) ITERATIONS=${{ parameters.iterations }} SCALE_UP=$scale OS_TYPE=${{ parameters.os }} go test -timeout 30m -tags load -run ^TestLoad$ + + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }} + logType: scaleTest + os: ${{ parameters.os }} + cni: ${{ parameters.cni }} diff --git a/.pipelines/cni/load-test-templates/restart-cns-template.yaml b/.pipelines/cni/load-test-templates/restart-cns-template.yaml index 7c9e7c6599..a976b29cea 100644 --- a/.pipelines/cni/load-test-templates/restart-cns-template.yaml +++ b/.pipelines/cni/load-test-templates/restart-cns-template.yaml @@ -3,6 +3,7 @@ parameters: cni: "cilium" scaleup: 100 nodeCount: 10 + os: "" steps: - task: AzureCLI@1 @@ -32,3 +33,10 @@ steps: name: "restartCNS" displayName: "Restart CNS and Validate pods" retryCountOnTaskFailure: 3 + + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }} + logType: restartCNS + os: ${{ parameters.os }} + cni: ${{ parameters.cni }} diff --git a/.pipelines/cni/load-test-templates/restart-node-template.yaml b/.pipelines/cni/load-test-templates/restart-node-template.yaml index a1c218867d..2e8c050f9d 100644 --- a/.pipelines/cni/load-test-templates/restart-node-template.yaml +++ b/.pipelines/cni/load-test-templates/restart-node-template.yaml @@ -3,6 +3,7 @@ parameters: nodeCount: 10 scaleup: 100 os: "linux" + cni: "" steps: - task: AzureCLI@1 @@ -30,3 +31,10 @@ steps: REPLICAS=$scale OS_TYPE=${{ parameters.os }} go test -count 1 -timeout 30m -tags load -run ^TestScaleDeployment$ name: "RestartNodes" displayName: "Restart Nodes" + + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }} + logType: restartNode + os: ${{ parameters.os }} + cni: ${{ parameters.cni }} diff --git a/.pipelines/cni/singletenancy/cniv1-template.yaml b/.pipelines/cni/singletenancy/cniv1-template.yaml index 5f7d1c54e2..d6ca60de9e 100644 --- a/.pipelines/cni/singletenancy/cniv1-template.yaml +++ b/.pipelines/cni/singletenancy/cniv1-template.yaml @@ -105,6 +105,7 @@ stages: os: ${{ parameters.os }} iterations: ${{ parameters.iterations }} nodeCount: ${{ parameters.nodeCount }} + cni: cniv1 - template: ../load-test-templates/validate-state-template.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) @@ -120,6 +121,7 @@ stages: os: ${{ parameters.os }} nodeCount: ${{ parameters.nodeCount }} scaleup: ${{ parameters.scaleup }} + cni: cniv1 - template: ../load-test-templates/validate-state-template.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) @@ -156,6 +158,21 @@ stages: hybridWin: true service: true hostport: true + - job: logs + displayName: "Log Failure" + dependsOn: + - update_cni + - npm_k8se2e + - deploy_pods + - restart_nodes + - recover + - cni_${{ parameters.os }} + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} - ${{ if eq(parameters.os, 'windows') }}: - stage: ${{ parameters.name }}_HNS @@ -240,3 +257,17 @@ stages: hybridWin: true service: true hostport: true + - job: logs + displayName: "Log Failure" + dependsOn: + - restart_hns + - restart_nodes + - deploy_pods + - recover + - cni_${{ parameters.os }} + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} diff --git a/.pipelines/cni/singletenancy/linux-cniv2-template.yaml b/.pipelines/cni/singletenancy/linux-cniv2-template.yaml index e15415df9d..8f58fd8be4 100644 --- a/.pipelines/cni/singletenancy/linux-cniv2-template.yaml +++ b/.pipelines/cni/singletenancy/linux-cniv2-template.yaml @@ -108,6 +108,7 @@ stages: os: ${{ parameters.os }} iterations: ${ITERATIONS_LINUX} nodeCount: ${{ parameters.nodeCount }} + cni: cniv2 - template: ../load-test-templates/validate-state-template.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) @@ -120,8 +121,10 @@ stages: - template: ../load-test-templates/restart-node-template.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} nodeCount: ${{ parameters.nodeCount }} scaleup: ${SCALEUP_LINUX} + cni: cniv2 - template: ../load-test-templates/validate-state-template.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) @@ -135,6 +138,7 @@ stages: - template: ../load-test-templates/restart-cns-template.yaml parameters: clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} cni: cniv2 scaleup: ${SCALEUP_LINUX} nodeCount: ${{ parameters.nodeCount }} @@ -167,4 +171,20 @@ stages: portforward: true service: true hostport: true - + - job: logs + displayName: "Log Failure" + dependsOn: + - integration + - npm_k8se2e + - deploy_pods + - restart_nodes + - restart_cns + - recover + - cni_${{ parameters.os }} + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cniv2 diff --git a/.pipelines/npm/npm-cni-integration-test.yaml b/.pipelines/npm/npm-cni-integration-test.yaml index 466d3ede8b..6aa49f92a3 100644 --- a/.pipelines/npm/npm-cni-integration-test.yaml +++ b/.pipelines/npm/npm-cni-integration-test.yaml @@ -126,9 +126,8 @@ jobs: continueOnError: ${{ parameters.continueOnError }} - bash: | - npmLogs=$(System.DefaultWorkingDirectory)/Attempt_#$(System.StageAttempt)/npmLogs_${{ parameters.clusterName }} + npmLogs=$(System.DefaultWorkingDirectory)/${{ parameters.clusterName }}_npmLogs_Attempt_#$(System.StageAttempt) mkdir -p $npmLogs - cp ./kubeconfig $npmLogs/kubeconfig kubectl get pods -n kube-system | grep npm npmPodList=`kubectl get pods -n kube-system | grep npm | awk '{print $1}'` @@ -139,8 +138,8 @@ jobs: displayName: Generate NPM pod logs condition: always() - - publish: $(System.DefaultWorkingDirectory)/Attempt_#$(System.StageAttempt)/npmLogs_${{ parameters.clusterName }} + - publish: $(System.DefaultWorkingDirectory)/${{ parameters.clusterName }}_npmLogs_Attempt_#$(System.StageAttempt) condition: always() - artifact: npmLogs_${{ parameters.clusterName }} + artifact: ${{ parameters.clusterName }}_npmLogs_Attempt_#$(System.StageAttempt) name: npmLogs displayName: Publish NPM pod logs diff --git a/.pipelines/singletenancy/aks-swift/e2e-job-template.yaml b/.pipelines/singletenancy/aks-swift/e2e-job-template.yaml index 220c69cddf..d752b020f1 100644 --- a/.pipelines/singletenancy/aks-swift/e2e-job-template.yaml +++ b/.pipelines/singletenancy/aks-swift/e2e-job-template.yaml @@ -60,3 +60,15 @@ stages: name: ${{ parameters.name }} testDropgz: ${{ parameters.testDropgz }} clusterName: ${{ parameters.clusterName }}-$(commitID) + + - job: logs + displayName: "Log Failure" + dependsOn: + - ${{ parameters.name }} + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cniv2 diff --git a/.pipelines/singletenancy/aks/e2e-job-template.yaml b/.pipelines/singletenancy/aks/e2e-job-template.yaml index 5cdb054c16..19d66e047e 100644 --- a/.pipelines/singletenancy/aks/e2e-job-template.yaml +++ b/.pipelines/singletenancy/aks/e2e-job-template.yaml @@ -79,3 +79,16 @@ stages: service: true hostport: true dependsOn: ${{ parameters.name }} + + - job: logs + displayName: "Log Failure" + dependsOn: + - ${{ parameters.name }} + - cni_${{ parameters.os }} + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cniv1 diff --git a/.pipelines/singletenancy/azure-cni-overlay/azure-cni-overlay-e2e-job-template.yaml b/.pipelines/singletenancy/azure-cni-overlay/azure-cni-overlay-e2e-job-template.yaml index 389a21f617..5399ee2464 100644 --- a/.pipelines/singletenancy/azure-cni-overlay/azure-cni-overlay-e2e-job-template.yaml +++ b/.pipelines/singletenancy/azure-cni-overlay/azure-cni-overlay-e2e-job-template.yaml @@ -57,3 +57,15 @@ stages: name: ${{ parameters.name }} testDropgz: ${{ parameters.testDropgz }} clusterName: ${{ parameters.clusterName }}-$(commitID) + + - job: logs + displayName: "Log Failure" + dependsOn: + - ${{ parameters.name }} + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cniv2 diff --git a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-job-template.yaml b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-job-template.yaml index 66562ac2a5..06e4351cf8 100644 --- a/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-job-template.yaml +++ b/.pipelines/singletenancy/cilium-overlay/cilium-overlay-e2e-job-template.yaml @@ -56,3 +56,15 @@ stages: name: ${{ parameters.name }} testDropgz: ${{ parameters.testDropgz }} clusterName: ${{ parameters.clusterName }}-$(commitID) + + - job: logs + displayName: "Log Failure" + dependsOn: + - ${{ parameters.name }} + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cilium diff --git a/.pipelines/singletenancy/cilium/cilium-e2e-job-template.yaml b/.pipelines/singletenancy/cilium/cilium-e2e-job-template.yaml index 357b31caa2..eade902955 100644 --- a/.pipelines/singletenancy/cilium/cilium-e2e-job-template.yaml +++ b/.pipelines/singletenancy/cilium/cilium-e2e-job-template.yaml @@ -60,3 +60,15 @@ stages: name: ${{ parameters.name }} testDropgz: ${{ parameters.testDropgz }} clusterName: ${{ parameters.clusterName }}-$(commitID) + + - job: logs + displayName: "Log Failure" + dependsOn: + - ${{ parameters.name }} + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cilium diff --git a/.pipelines/singletenancy/dualstack-overlay/dualstackoverlay-e2e-job-template.yaml b/.pipelines/singletenancy/dualstack-overlay/dualstackoverlay-e2e-job-template.yaml index 5d2a1e1866..7623abbc56 100644 --- a/.pipelines/singletenancy/dualstack-overlay/dualstackoverlay-e2e-job-template.yaml +++ b/.pipelines/singletenancy/dualstack-overlay/dualstackoverlay-e2e-job-template.yaml @@ -80,3 +80,17 @@ stages: portforward: true service: true hybridWin: true + + - job: logs + displayName: "Log Failure" + dependsOn: + - ${{ parameters.name }} + - cni_${{ parameters.os }} + - cni_windows + condition: failed() + steps: + - template: ../../templates/log-template.yaml + parameters: + clusterName: ${{ parameters.clusterName }}-$(commitID) + os: ${{ parameters.os }} + cni: cniv2 diff --git a/.pipelines/templates/log-template.yaml b/.pipelines/templates/log-template.yaml new file mode 100644 index 0000000000..ce9541eb43 --- /dev/null +++ b/.pipelines/templates/log-template.yaml @@ -0,0 +1,274 @@ +# -- Captures -- +# CNS, CNI, and Cilium Logs +# CNS, CNI, and Cilium State files +# Daemonset and Deployment Images +# Node Status +# kube-system namespace logs +# Non-ready pods on failure +# -- Controled by -- +# CNI and OS | ${{ parameters.cni }} and ${{ parameters.os }} +# CNS ConfigMap | "ManageEndpointState" +# -- Generates -- +# Logs on a per-node basis +# Outputs a singluar unique artifact per template call | ${{ parameters.clusterName }}_${{ parameters.logType }}_Attempt_#$(System.StageAttempt) +# Each artifact is divided into sub-directories +# -- Intent -- +# Provide through debugging information to understand why CNI test scenarios are failing without having to blindly reproduce + +parameters: + clusterName: "" + logType: "failure" + os: "" + cni: "" + +steps: + - task: AzureCLI@1 + inputs: + azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) + scriptLocation: "inlineScript" + scriptType: "bash" + addSpnToEnvironment: true + inlineScript: | + make -C ./hack/aks set-kubeconf AZCLI=az CLUSTER=${{ parameters.clusterName }} + + acnLogs=$(System.DefaultWorkingDirectory)/${{ parameters.clusterName }}_${{ parameters.logType }}_Attempt_#$(System.StageAttempt) + mkdir -p $acnLogs + echo "Root Directory created: $acnLogs" + echo "##vso[task.setvariable variable=acnLogs]$acnLogs" + + kubectl get pods -n kube-system -owide + podList=`kubectl get pods -n kube-system --no-headers | awk '{print $1}'` + mkdir -p $acnLogs/kube-system + echo "Directory created: $acnLogs/kube-system" + for pod in $podList; do + kubectl logs -n kube-system $pod > $acnLogs/kube-system/$pod-logs.txt + echo "$acnLogs/kube-system/$pod-logs.txt" + done + displayName: Kube-System Logs + condition: always() + continueOnError: true # Tends to fail after node restart due to pods still restarting. This should not block other tests or logs from running. + + - bash: | + kubectl describe nodes + displayName: Node Status + condition: always() + + - bash: | + kubectl get ds -A -owide + echo "Capture daemonset images being used" + dsList=`kubectl get ds -A | grep kube-system | awk '{print $2}'` + for ds in $dsList; do + echo "$ds" + kubectl describe ds -n kube-system $ds | grep Image + done + displayName: Daemonset Images + condition: always() + + - bash: | + kubectl get deploy -A -owide + echo "Capture deployment images being used" + deployList=`kubectl get deploy -A | grep kube-system | awk '{print $2}'` + for deploy in $deployList; do + echo "$deploy" + kubectl describe deploy -n kube-system $deploy | grep Image + done + displayName: Deployment Images + condition: always() + + - ${{ if eq(parameters.os, 'linux') }}: + - bash: | + echo "Ensure that privileged pod exists on each node" + kubectl apply -f test/integration/manifests/load/privileged-daemonset.yaml + kubectl rollout status ds -n kube-system privileged-daemonset + + echo "------ Log work ------" + kubectl get pods -n kube-system -l os=linux,app=privileged-daemonset -owide + echo "Capture logs from each linux node. Files located in var/logs/*." + podList=`kubectl get pods -n kube-system -l os=linux,app=privileged-daemonset -owide --no-headers | awk '{print $1}'` + for pod in $podList; do + index=0 + files=(`kubectl exec -i -n kube-system $pod -- find ./var/log -maxdepth 2 -name "azure-*" -type f`) + fileBase=(`kubectl exec -i -n kube-system $pod -- find ./var/log -maxdepth 2 -name "azure-*" -type f -printf "%f\n"`) + + node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'` + mkdir -p $(acnLogs)/"$node"_logs/log-output/ + echo "Directory created: $(acnLogs)/"$node"_logs/" + + for file in ${files[*]}; do + kubectl exec -i -n kube-system $pod -- cat $file > $(acnLogs)/"$node"_logs/log-output/${fileBase[$index]} + echo "Azure-*.log, ${fileBase[$index]}, captured: $(acnLogs)/"$node"_logs/log-output/${fileBase[$index]}" + ((index++)) + done + if [ ${{ parameters.cni }} = 'cilium' ]; then + file="cilium-cni.log" + kubectl exec -i -n kube-system $pod -- cat var/log/$file > $(acnLogs)/"$node"_logs/log-output/$file + echo "Cilium log, $file, captured: $(acnLogs)/"$node"_logs/log-output/$file" + fi + done + + if ! [ ${{ parameters.cni }} = 'cilium' ]; then + echo "------ Privileged work ------" + kubectl get pods -n kube-system -l os=linux,app=privileged-daemonset -owide + echo "Capture State Files from privileged pods" + for pod in $podList; do + node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'` + mkdir -p $(acnLogs)/"$node"_logs/privileged-output/ + echo "Directory created: $(acnLogs)/"$node"_logs/privileged-output/" + + file="azure-vnet.json" + kubectl exec -i -n kube-system $pod -- cat /var/run/$file > $(acnLogs)/"$node"_logs/privileged-output/$file + echo "CNI State, $file, captured: $(acnLogs)/"$node"_logs/privileged-output/$file" + if [ ${{ parameters.cni }} = 'cniv1' ]; then + file="azure-vnet-ipam.json" + kubectl exec -i -n kube-system $pod -- cat /var/run/$file > $(acnLogs)/"$node"_logs/privileged-output/$file + echo "CNIv1 IPAM, $file, captured: $(acnLogs)/"$node"_logs/privileged-output/$file" + fi + done + fi + + if [ ${{ parameters.cni }} = 'cilium' ] || [ ${{ parameters.cni }} = 'cniv2' ]; then + echo "------ CNS work ------" + + + kubectl get pods -n kube-system -l k8s-app=azure-cns + echo "Capture State Files from CNS pods" + cnsPods=`kubectl get pods -n kube-system -l k8s-app=azure-cns --no-headers | awk '{print $1}'` + for pod in $cnsPods; do + managed=`kubectl exec -i -n kube-system $pod -- cat etc/azure-cns/cns_config.json | jq .ManageEndpointState` + node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'` + mkdir -p $(acnLogs)/"$node"_logs/CNS-output/ + echo "Directory created: $(acnLogs)/"$node"_logs/CNS-output/" + + file="cnsCache.txt" + kubectl exec -i -n kube-system $pod -- curl localhost:10090/debug/ipaddresses -d {\"IPConfigStateFilter\":[\"Assigned\"]} > $(acnLogs)/"$node"_logs/CNS-output/$file + echo "CNS cache, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file" + + file="azure-cns.json" + kubectl exec -i -n kube-system $pod -- cat /var/lib/azure-network/$file > $(acnLogs)/"$node"_logs/CNS-output/$file + echo "CNS State, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file" + if [ $managed = "true" ]; then + file="azure-endpoints.json" + kubectl exec -i -n kube-system $pod -- cat /var/run/azure-cns/$file > $(acnLogs)/"$node"_logs/CNS-output/$file + echo "CNS Managed State, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file" + fi + done + fi + + if [ ${{ parameters.cni }} = 'cilium' ]; then + echo "------ Cilium work ------" + kubectl get pods -n kube-system -l k8s-app=cilium + echo "Capture State Files from Cilium pods" + ciliumPods=`kubectl get pods -n kube-system -l k8s-app=cilium --no-headers | awk '{print $1}'` + for pod in $ciliumPods; do + node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'` + mkdir -p $(acnLogs)/"$node"_logs/Cilium-output/ + echo "Directory created: $(acnLogs)/"$node"_logs/Cilium-output/" + + file="cilium-endpoint.json" + kubectl exec -i -n kube-system $pod -- cilium endpoint list -o json > $(acnLogs)/"$node"_logs/Cilium-output/$file + echo "Cilium, $file, captured: $(acnLogs)/"$node"_logs/Cilium-output/$file" + done + fi + displayName: Linux Logs + condition: always() + + - ${{ if eq(parameters.os, 'windows') }}: + - bash: | + echo "Ensure that privileged pod exists on each node" + kubectl apply -f test/integration/manifests/load/privileged-daemonset-windows.yaml + kubectl rollout status ds -n kube-system privileged-daemonset + + echo "------ Log work ------" + kubectl get pods -n kube-system -l os=windows,app=privileged-daemonset -owide + echo "Capture logs from each windows node. Files located in \k" + podList=`kubectl get pods -n kube-system -l os=windows,app=privileged-daemonset -owide --no-headers | awk '{print $1}'` + for pod in $podList; do + files=`kubectl exec -i -n kube-system $pod -- powershell "ls ../../k/azure*.log*" | grep azure | awk '{print $6}'` + node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'` + mkdir -p $(acnLogs)/"$node"_logs/log-output/ + echo "Directory created: $(acnLogs)/"$node"_logs/log-output/" + + for file in $files; do + kubectl exec -i -n kube-system $pod -- powershell "cat ../../k/$file" > $(acnLogs)/"$node"_logs/log-output/$file + echo "Azure-*.log, $file, captured: $(acnLogs)/"$node"_logs/log-output/$file" + done + if [ ${{ parameters.cni }} = 'cniv2' ]; then + file="azure-cns.log" + kubectl exec -i -n kube-system $pod -- cat k/azurecns/$file > $(acnLogs)/"$node"_logs/log-output/$file + echo "CNS Log, $file, captured: $(acnLogs)/"$node"_logs/log-output/$file" + fi + done + + echo "------ Privileged work ------" + kubectl get pods -n kube-system -l os=windows,app=privileged-daemonset -owide + echo "Capture State Files from privileged pods" + for pod in $podList; do + node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'` + mkdir -p $(acnLogs)/"$node"_logs/privileged-output/ + echo "Directory created: $(acnLogs)/"$node"_logs/privileged-output/" + + file="azure-vnet.json" + kubectl exec -i -n kube-system $pod -- powershell cat ../../k/$file > $(acnLogs)/"$node"_logs/privileged-output/$file + echo "CNI State, $file, captured: $(acnLogs)/"$node"_logs/privileged-output/$file" + if [ ${{ parameters.cni }} = 'cniv1' ]; then + file="azure-vnet-ipam.json" + kubectl exec -i -n kube-system $pod -- powershell cat ../../k/$file > $(acnLogs)/"$node"_logs/privileged-output/$file + echo "CNI IPAM, $file, captured: $(acnLogs)/"$node"_logs/privileged-output/$file" + fi + done + + if [ ${{ parameters.cni }} = 'cniv2' ]; then + echo "------ CNS work ------" + + + kubectl get pods -n kube-system -l k8s-app=azure-cns-win --no-headers + echo "Capture State Files from CNS pods" + cnsPods=`kubectl get pods -n kube-system -l k8s-app=azure-cns-win --no-headers | awk '{print $1}'` + for pod in $cnsPods; do + managed=`kubectl exec -i -n kube-system pod -- powershell cat etc/azure-cns/cns_config.json | jq .ManageEndpointState` + node=`kubectl get pod -n kube-system $pod -o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers | awk '{print $1}'` + mkdir -p $(acnLogs)/"$node"_logs/CNS-output/ + echo "Directory created: $(acnLogs)/"$node"_logs/CNS-output/" + + file="cnsCache.txt" + kubectl exec -i -n kube-system $pod -- powershell curl localhost:10090/debug/ipaddresses -d {\"IPConfigStateFilter\":[\"Assigned\"]} > $(acnLogs)/"$node"_logs/CNS-output/$file + echo "CNS cache, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file" + + file="azure-cns.json" + kubectl exec -i -n kube-system $pod -- powershell cat k/azurecns/azure-cns.json > $(acnLogs)/"$node"_logs/CNS-output/$file + echo "CNS State, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file" + if [ $managed = "true" ]; then + file="azure-endpoints.json" + kubectl exec -i -n kube-system $pod -- cat k/azurecns/$file > $(acnLogs)/"$node"_logs/CNS-output/$file + echo "CNS Managed State, $file, captured: $(acnLogs)/"$node"_logs/CNS-output/$file" + fi + done + fi + displayName: Windows Logs + condition: always() + + - ${{ if eq(parameters.logType, 'failure') }}: + - bash: | + kubectl get pods -n kube-system -o custom-columns=NAME:.metadata.name,STATUS:.status.phase,NODE:.spec.nodeName + podList=`kubectl get pods -n kube-system -o custom-columns=NAME:.metadata.name,STATUS:.status.phase,NODE:.spec.nodeName --no-headers | grep -v Running | awk '{print $1}'` + array=($podList) + + if [ -z ${array[0]} ]; then + echo There are no kube-system pods in a non-ready state. + else + mkdir -p $acnLogs/non-ready + echo Directory created: $acnLogs/non-ready + echo Capturing failed pods + for pod in $podList; do + kubectl describe pod -n kube-system $pod > $acnLogs/non-ready/$pod.txt + echo $acnLogs/non-ready/$pod.txt + done + fi + displayName: Failure Logs + condition: always() + + - publish: $(System.DefaultWorkingDirectory)/${{ parameters.clusterName }}_${{ parameters.logType }}_Attempt_#$(System.StageAttempt) + condition: always() + artifact: ${{ parameters.clusterName }}_${{ parameters.logType }}_Attempt_#$(System.StageAttempt) + name: acnLogs_${{ parameters.logType }} + displayName: Publish Cluster logs diff --git a/test/integration/manifests/load/privileged-daemonset.yaml b/test/integration/manifests/load/privileged-daemonset.yaml index 6448f56980..7f4d370b66 100644 --- a/test/integration/manifests/load/privileged-daemonset.yaml +++ b/test/integration/manifests/load/privileged-daemonset.yaml @@ -5,14 +5,17 @@ metadata: namespace: kube-system labels: app: privileged-daemonset + os: linux spec: selector: matchLabels: app: privileged-daemonset + os: linux template: metadata: labels: app: privileged-daemonset + os: linux spec: hostNetwork: true hostPID: true @@ -24,6 +27,8 @@ spec: privileged: true runAsUser: 0 volumeMounts: + - mountPath: /var/log + name: log - mountPath: /var/run/azure-cns name: azure-cns - mountPath: /var/run/azure-network @@ -32,7 +37,12 @@ spec: name: host-root - mountPath: /var/run name: azure-cns-noncilium + - name: cns-state + mountPath: /var/lib/azure-network volumes: + - name: log + hostPath: + path: /var/log - name: azure-cns hostPath: path: /var/run/azure-cns @@ -42,9 +52,12 @@ spec: - name: azure-cns-noncilium hostPath: path: /var/run + - name: cns-state + hostPath: + path: /var/lib/azure-network - hostPath: path: / type: "" name: host-root nodeSelector: - kubernetes.io/os: linux \ No newline at end of file + kubernetes.io/os: linux