Skip to content

E2E K8s Tests

E2E K8s Tests #72

Workflow file for this run

name: E2E K8s Tests
on:
push:
branches:
- pre-release
- sbcli
schedule:
- cron: '0 9 * * *' # Runs every day at 9 AM UTC
workflow_dispatch:
inputs:
simplyBlockDeploy_branch:
description: 'Branch for simplyBlockDeploy'
required: true
default: 'main'
spdk_csi_branch:
description: 'Branch for spdk-csi'
required: true
default: 'master'
sbcli_cmd:
description: 'Command for sbcli execution'
required: true
default: 'sbcli-dev'
testname:
description: 'Name of test to run. Empty to run all'
required: false
default: ''
send_slack_notification:
description: 'Send Slack notification?'
required: false
default: true
type: boolean
ndcs:
description: 'Number of data chunks'
required: false
default: 1
npcs:
description: 'Number of parity chunks'
required: false
default: 1
bs:
description: 'Block size'
required: false
default: 4096
chunk_bs:
description: 'Chunk block size'
required: false
default: 4096
jobs:
e2e:
runs-on: self-hosted
concurrency:
group: ${{ github.workflow }}
cancel-in-progress: false
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set send_slack_notification for scheduled and push events
if: github.event_name == 'schedule' || github.event_name == 'push'
run: echo "send_slack_notification=true" >> $GITHUB_ENV
- name: Set send_slack_notification for manual workflow_dispatch
if: github.event_name == 'workflow_dispatch'
run: echo "send_slack_notification=${{ github.event.inputs.send_slack_notification }}" >> $GITHUB_ENV
- uses: actions/setup-go@v5
with:
go-version: '1.22'
- run: go version
- name: Install Helm v3.15.4
run: |
curl -L https://get.helm.sh/helm-v3.15.4-linux-amd64.tar.gz -o helm-v3.15.4-linux-amd64.tar.gz
tar -zxvf helm-v3.15.4-linux-amd64.tar.gz
sudo mv linux-amd64/helm /usr/local/bin/helm
helm version
- name: Install kubectl v1.31.0
run: |
curl -LO "https://dl.k8s.io/release/v1.31.0/bin/linux/amd64/kubectl"
chmod +x kubectl
sudo mv kubectl /usr/local/bin/
kubectl version --client
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- uses: actions/checkout@master
name: Checkout code simplyBlockDeploy
with:
repository: simplyblock-io/simplyBlockDeploy
ref: refs/heads/${{ github.event.inputs.simplyBlockDeploy_branch || 'main'}}
path: 'simplyBlockDeploy'
token: ${{ secrets.GH_ACCESS_KEY_ID_HAMDI }}
- uses: actions/checkout@master
name: Checkout code spdk-csi
with:
repository: simplyblock-io/spdk-csi
ref: refs/heads/${{ github.event.inputs.spdk_csi_branch || 'master'}}
path: 'spdk-csi'
token: ${{ secrets.GH_ACCESS_KEY_ID_HAMDI }}
- name: Setup Terraform
uses: hashicorp/setup-terraform@v2
with:
terraform_wrapper: false
- name: Initialize Terraform
run: |
cd $GITHUB_WORKSPACE/simplyBlockDeploy
export TFSTATE_BUCKET=simplyblock-terraform-state-bucket
export TFSTATE_KEY=csi
export TFSTATE_REGION=us-east-2
export TFSTATE_DYNAMODB_TABLE=terraform-up-and-running-locks
terraform init -reconfigure \
-backend-config="bucket=${TFSTATE_BUCKET}" \
-backend-config="key=${TFSTATE_KEY}" \
-backend-config="region=${TFSTATE_REGION}" \
-backend-config="dynamodb_table=${TFSTATE_DYNAMODB_TABLE}" \
-backend-config="encrypt=true"
- name: Select or create workspace
run: |
cd $GITHUB_WORKSPACE/simplyBlockDeploy
terraform workspace select -or-create ghiaction-sbclie2ek8s
- name: Validate Terraform Configuration
run: |
cd $GITHUB_WORKSPACE/simplyBlockDeploy
terraform validate
- name: Set parameters based on branch and input
run: |
if [[ "${GITHUB_REF##*/}" == *"pre-release"* ]]; then
echo "Branch is pre-release. Setting SBCLI_CMD to sbcli-pre."
echo "SBCLI_CMD=sbcli-pre" >> $GITHUB_ENV
elif [[ "${GITHUB_REF##*/}" == "sbcli" ]]; then
echo "Branch is sbcli. Setting SBCLI_CMD to sbcli."
echo "SBCLI_CMD=sbcli" >> $GITHUB_ENV
else
echo "Setting SBCLI_CMD to input or default to sbcli-dev."
echo "SBCLI_CMD=${{ github.event.inputs.sbcli_cmd || 'sbcli-dev' }}" >> $GITHUB_ENV
fi
echo "NDCS=${{ github.event.inputs.ndcs || 1 }}" >> $GITHUB_ENV
echo "NPCS=${{ github.event.inputs.npcs || 1 }}" >> $GITHUB_ENV
echo "BS=${{ github.event.inputs.bs || 4096 }}" >> $GITHUB_ENV
echo "CHUNK_BS=${{ github.event.inputs.chunk_bs || 4096 }}" >> $GITHUB_ENV
shell: bash
- name: Plan Terraform Changes
run: |
cd $GITHUB_WORKSPACE/simplyBlockDeploy
terraform plan \
-var "mgmt_nodes=1" -var "storage_nodes=3" -var "volumes_per_storage_nodes=3" \
-var storage_nodes_instance_type="m6g.2xlarge" \
-var "storage_nodes_arch=arm64" -var "snode_deploy_on_k8s=true" \
-var "extra_nodes=1" -var "extra_nodes_instance_type=m6gd.xlarge" \
-var storage_nodes_ebs_size2=100 -var "region=us-east-2" \
-var "extra_nodes_arch=arm64" \
-var "sbcli_cmd=$SBCLI_CMD" -out=tfplan
env:
SBCLI_CMD: ${{ env.SBCLI_CMD }}
- name: Apply Terraform Changes
run: |
cd $GITHUB_WORKSPACE/simplyBlockDeploy
terraform apply tfplan
- name: Get Terraform Outputs
id: terraform_outputs
run: |
cd $GITHUB_WORKSPACE/simplyBlockDeploy
output_bastion_public_ip=$(terraform output -raw bastion_public_ip)
echo "::set-output name=bastion_public_ip::$output_bastion_public_ip"
output_key_name=$(terraform output -raw key_name)
echo "::set-output name=key_name::$output_key_name"
output_extra_node_public_ip=$(terraform output -raw extra_nodes_public_ips)
echo "::set-output name=extra_node_public_ip::$output_extra_node_public_ip"
output_mgmt_private_ip=$(terraform output -raw mgmt_private_ips)
echo "::set-output name=mgmt_private_ip::$output_mgmt_private_ip"
- name: Bootstrap Cluster
run: |
cd $GITHUB_WORKSPACE/simplyBlockDeploy
./bootstrap-cluster.sh --sbcli-cmd "$SBCLI_CMD" \
--max-lvol 10 --max-snap 10 --max-prov 900G --number-of-devices 3 \
--max-lvol 10 --max-snap 10 --max-prov 900G --number-of-devices 3 \
--distr-ndcs $NDCS \
--distr-npcs $NPCS \
--distr-bs $BS \
--distr-chunk-bs $CHUNK_BS \
--k8s-snode
id: bootstrap_cluster
env:
SBCLI_CMD: ${{ env.SBCLI_CMD }}
NDCS: ${{ env.NDCS }}
NPCS: ${{ env.NPCS }}
BS: ${{ env.BS }}
CHUNK_BS: ${{ env.CHUNK_BS }}
- name: Bootstrap k3s
run: |
echo "Sleeping for 30 seconds before bootstrap-k3s"
sleep 30
cd $GITHUB_WORKSPACE/simplyBlockDeploy
./bootstrap-k3s.sh --k8s-snode
id: bootstrap_k3s
- name: Configure Kubeconfig
run: |
mkdir -p $HOME/.kube
scp -i "$HOME/.ssh/${{ steps.terraform_outputs.outputs.key_name }}" ec2-user@${{ steps.terraform_outputs.outputs.extra_node_public_ip }}:/etc/rancher/k3s/k3s.yaml $HOME/.kube/config
sed -i "s/server: https:\/\/\(localhost\|127.0.0.1\)\(:[0-9]*\)/server: https:\/\/${{ steps.terraform_outputs.outputs.extra_node_public_ip }}\2/" $HOME/.kube/config
- name: Install Helm Chart for spdk-csi
run: |
cd $GITHUB_WORKSPACE/spdk-csi/charts/latest/spdk-csi/
echo "Sleeping for 30 seconds before helm install"
sleep 30
helm install spdk-csi ./ \
--namespace spdk-csi \
--create-namespace \
--set csiConfig.simplybk.uuid="${{ steps.bootstrap_cluster.outputs.cluster_id }}" \
--set csiConfig.simplybk.ip="${{ steps.bootstrap_cluster.outputs.cluster_api_gateway_endpoint }}" \
--set csiSecret.simplybk.secret="${{ steps.bootstrap_cluster.outputs.cluster_secret }}" \
--set logicalVolume.pool_name="testing1" \
--set logicalVolume.snapshot="True" \
--set image.spdkcsi.tag=master-arm64 \
--set image.simplyblock.tag=main \
--set storagenode.spdkImage=simplyblock/spdk:feature-journal-ha-latest
# - name: Wait for Storage Nodes to Become Online
# run: |
# echo "Sleeping for 120 seconds before helm install"
# sleep 120
# while true; do
# echo "Checking storage node status..."
# NODE_STATUS=$(ssh -i "$HOME/.ssh/${{ steps.terraform_outputs.outputs.key_name }}" \
# -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
# -o ProxyCommand="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i \"$HOME/.ssh/${{ steps.terraform_outputs.outputs.key_name }}\" -W %h:%p ec2-user@${{ steps.terraform_outputs.outputs.bastion_public_ip }}" \
# ec2-user@${{ steps.terraform_outputs.outputs.mgmt_private_ip }} \
# "sbcli-dev sn list | grep -Eo 'online|offline'" || echo "SSH command failed")
# SSH_EXIT_CODE=$?
# if [ $SSH_EXIT_CODE -ne 0 ]; then
# echo "SSH command failed with exit code $SSH_EXIT_CODE. Retrying..."
# sleep 30
# continue
# fi
# if [ -z "$NODE_STATUS" ]; then
# echo "No output from sbcli-dev sn list. Retrying..."
# sleep 30
# continue
# fi
# echo "Node Status: $NODE_STATUS"
# ONLINE_NODES=$(echo "$NODE_STATUS" | grep -c "online")
# if [ "$ONLINE_NODES" -ge 3 ]; then
# echo "All 3 storage nodes are online."
# break
# else
# echo "$ONLINE_NODES nodes online. Waiting for more nodes to come online..."
# sleep 30
# fi
# done
# shell: /usr/bin/bash -e {0}
- name: Sleep after Helm Installation
run: sleep 300
# - name: Activate Cluster
# run: |
# ssh -i "$HOME/.ssh/${{ steps.terraform_outputs.outputs.key_name }}" -o StrictHostKeyChecking=no \
# -o ProxyCommand="ssh -o StrictHostKeyChecking=no -i \"$HOME/.ssh/${{ steps.terraform_outputs.outputs.key_name }}\" -W %h:%p ec2-user@${{ steps.terraform_outputs.outputs.bastion_public_ip }}" \
# ec2-user@${{ steps.terraform_outputs.outputs.mgmt_private_ip }} "
# $SBCLI_CMD cluster activate ${{ steps.bootstrap_cluster.outputs.cluster_id }}
# "
# env:
# SBCLI_CMD: ${{ env.SBCLI_CMD }}
- name: Record Test Start Time
run: echo "TEST_START_TIME=$(date +%s)" >> $GITHUB_ENV
- name: Setup Tests & Run Tests
timeout-minutes: 120
run: |
cd $GITHUB_WORKSPACE/e2e
sudo apt-get install -y python3.12-venv
python3 -m venv myenv
source myenv/bin/activate
python3 -m pip install -r requirements.txt
echo "Running tests in namespace ${{ steps.get-namespace.outputs.namespace }}"
export CLUSTER_ID=${{ steps.bootstrap_cluster.outputs.cluster_id }}
export CLUSTER_SECRET=${{ steps.bootstrap_cluster.outputs.cluster_secret }}
export CLUSTER_IP=${{ steps.bootstrap_cluster.outputs.cluster_ip }}
export API_BASE_URL=${{ steps.bootstrap_cluster.outputs.cluster_api_gateway_endpoint }}
export BASTION_SERVER=${{ steps.terraform_outputs.outputs.bastion_public_ip }}
export KEY_NAME=${{ steps.terraform_outputs.outputs.key_name }}
export AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}
export AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }}
export AWS_REGION=${{ secrets.AWS_REGION }}
export SBCLI_CMD=${SBCLI_CMD}
TESTNAME=""
if [ -n "${{ github.event.inputs.testname }}" ]; then
TESTNAME="--testname ${{ github.event.inputs.testname }}"
fi
python3 e2e.py $TESTNAME \
--ndcs $NDCS --npcs $NPCS --bs $BS --chunk_bs $CHUNK_BS \
--run_k8s True
env:
SBCLI_CMD: ${{ env.SBCLI_CMD }}
NDCS: ${{ env.NDCS }}
NPCS: ${{ env.NPCS }}
BS: ${{ env.BS }}
CHUNK_BS: ${{ env.CHUNK_BS }}
- name: Record Test End Time
if: always()
run: echo "TEST_END_TIME=$(date +%s)" >> $GITHUB_ENV
- name: Calculate Total Time Taken
if: always()
run: |
TEST_TIME=$(($TEST_END_TIME - $TEST_START_TIME))
TEST_TIME_HOURS=$(($TEST_TIME / 3600))
TEST_TIME_MINS=$((($TEST_TIME % 3600) / 60))
TEST_TIME_SECS=$(($TEST_TIME % 60))
echo "Test runtime: ${TEST_TIME_HOURS}h ${TEST_TIME_MINS}m ${TEST_TIME_SECS}s"
echo "TEST_TIME_HOURS=$TEST_TIME_HOURS" >> $GITHUB_ENV
echo "TEST_TIME_MINS=$TEST_TIME_MINS" >> $GITHUB_ENV
echo "TEST_TIME_SECS=$TEST_TIME_SECS" >> $GITHUB_ENV
- name: Upload automation and docker logs to s3
run: |
cd $GITHUB_WORKSPACE/e2e/logs
./upload_logs.sh
cd $GITHUB_WORKSPACE/simplyBlockDeploy
./upload_docker_logs_to_s3.sh
if: always()
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.AWS_REGION }}
S3_BUCKET_NAME: "simplyblock-e2e-test-logs"
RUN_ID: ${{ github.run_id }}
- name: Parse test results
if: always() && (github.event_name == 'schedule' || env.send_slack_notification == 'true')
id: parse_results
run: |
cd $GITHUB_WORKSPACE/e2e/logs
echo "Looking for the latest non-empty log file..."
# Find the latest non-empty log file
COUNTER=0
MAX_ATTEMPTS=10
while [ $COUNTER -lt $MAX_ATTEMPTS ]; do
LATEST_LOG=$(ls -t *.log | head -n 1)
if [ -s "$LATEST_LOG" ]; then
echo "Found non-empty log file: $LATEST_LOG"
break
fi
echo "Attempt $((COUNTER + 1)): No non-empty log file found. Retrying..."
COUNTER=$((COUNTER + 1))
sleep 1 # Add a small delay to avoid rapid looping
done
if [ ! -s "$LATEST_LOG" ]; then
echo "No non-empty log file found after $MAX_ATTEMPTS attempts"
exit 1
fi
echo "Parsing the identified log file: $LATEST_LOG"
# Parse the identified log file
echo "Total tests"
TOTAL_TESTS=$(grep -i "Number of Total Cases" "$LATEST_LOG" | awk '{print $NF}')
echo "number Passed tests"
PASSED_TESTS=$(grep -i "Number of Passed Cases" "$LATEST_LOG" | awk '{print $NF}')
echo "number Failed tests"
FAILED_TESTS=$(grep -i "Number of Failed Cases" "$LATEST_LOG" | awk '{print $NF}')
echo "List Passed tests"
PASSED_CASES=$(grep "PASSED CASE" "$LATEST_LOG" | awk -F 'INFO - | FAILED CASE' '{print $2}')
echo "List Failed tests"
FAILED_CASES=$(grep "FAILED CASE" "$LATEST_LOG" | awk -F 'INFO - | FAILED CASE' '{print $2}')
# Format passed and failed cases as bullet points
echo "Adding PASSED cases with bullets: $PASSED_CASES"
echo "Adding FAILED cases with bullets: $PASSED_CASES"
PASSED_CASES_BULLETS=$(echo "$PASSED_CASES" | awk '{printf " • %s\n", $0}')
FAILED_CASES_BULLETS=$(echo "$FAILED_CASES" | awk '{printf " • %s\n", $0}')
echo "PASSED cases with bullets: $PASSED_CASES_BULLETS"
echo "FAILED cases with bullets: $FAILED_CASES_BULLETS"
echo "TOTAL_TESTS=${TOTAL_TESTS}"
echo "PASSED_TESTS=${PASSED_TESTS}"
echo "FAILED_TESTS=${FAILED_TESTS}"
echo "PASSED_CASES=${PASSED_CASES}"
echo "FAILED_CASES=${FAILED_CASES}"
echo "PASSED_TESTS=${PASSED_TESTS}" >> $GITHUB_ENV
echo "FAILED_TESTS=${FAILED_TESTS}" >> $GITHUB_ENV
echo "TOTAL_TESTS=${TOTAL_TESTS}" >> $GITHUB_ENV
echo "PASSED_CASES<<EOF" >> $GITHUB_ENV
echo "${PASSED_CASES}" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
echo "FAILED_CASES<<EOF" >> $GITHUB_ENV
echo "${FAILED_CASES}" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
echo "PASSED_CASES_BULLETS<<EOF" >> $GITHUB_ENV
echo "${PASSED_CASES_BULLETS}" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
echo "FAILED_CASES_BULLETS<<EOF" >> $GITHUB_ENV
echo "${FAILED_CASES_BULLETS}" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
- name: Send Slack Notification
if: always() && (github.event_name == 'schedule' || env.send_slack_notification == 'true')
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
GITHUB_SERVER_URL: ${{ github.server_url }}
GITHUB_REPOSITORY: ${{ github.repository }}
S3_BUCKET_NAME: "simplyblock-e2e-test-logs"
RUN_ID: ${{ github.run_id }}
PASSED_TESTS: ${{ env.PASSED_TESTS }}
FAILED_TESTS: ${{ env.FAILED_TESTS }}
TOTAL_TESTS: ${{ env.TOTAL_TESTS }}
PASSED_CASES: ${{ env.PASSED_CASES }}
FAILED_CASES: ${{ env.FAILED_CASES }}
PASSED_CASES_BULLETS: ${{ env.PASSED_CASES_BULLETS }}
FAILED_CASES_BULLETS: ${{ env.FAILED_CASES_BULLETS }}
BRANCH_NAME: ${{ github.ref_name }}
TEST_TIME_HOURS: ${{ env.TEST_TIME_HOURS }}
TEST_TIME_MINS: ${{ env.TEST_TIME_MINS }}
TEST_TIME_SECS: ${{ env.TEST_TIME_SECS }}
NDCS: ${{ env.NDCS }}
NPCS: ${{ env.NPCS }}
CHUNK_BS: ${{ env.CHUNK_BS }}
BS: ${{ env.BS }}
run: |
GITHUB_RUN_URL="${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${RUN_ID}"
AWS_LOGS_URL="https://s3.console.aws.amazon.com/s3/buckets/${S3_BUCKET_NAME}?prefix=${RUN_ID}/&region=us-east-2"
if [[ ${{ job.status }} == 'success' ]]; then
OVERALL_STATUS=":white_check_mark: Overall Status: SUCCESS"
else
OVERALL_STATUS=":x: Overall Status: FAILURE"
fi
TIME_TAKEN="${TEST_TIME_HOURS}h ${TEST_TIME_MINS}m ${TEST_TIME_SECS}s"
MESSAGE="Python E2E *K8s* tests run triggered on branch *${BRANCH_NAME}*. \nTotal Time Taken to run the tests: ${TIME_TAKEN}. \n\n${OVERALL_STATUS}\nGitHub Run: ${GITHUB_RUN_URL}\nAWS Logs: ${AWS_LOGS_URL}\n\n*Configuration*: *NDCS: ${NDCS}, NPCS: ${NPCS}, Block Size: ${BS}, Chunk Block Size: ${CHUNK_BS}*\n\nTotal Tests: *${TOTAL_TESTS}*, Passed Tests: *${PASSED_TESTS}*, Failed Tests: *${FAILED_TESTS}*\n\n-- Test Cases Passed :white_check_mark:\n${PASSED_CASES_BULLETS}\n\n-- Test Cases Failed :x:\n${FAILED_CASES_BULLETS}"
curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"${MESSAGE}\"}" $SLACK_WEBHOOK_URL
- name: Destroy Cluster
if: always()
run: |
cd $GITHUB_WORKSPACE/simplyBlockDeploy
terraform destroy --auto-approve
- name: Cleanup build folder
run: |
ls -la ./
rm -rf ./* || true
rm -rf ./.??* || true
ls -la ./
rm -rf $HOME/.kube