feat: Enhance CI/CD (Build, E2E, Composite Action) with 1ES Migration and Phi-3 Integration #14
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build and Push Preset Models 1ES | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
on: | |
pull_request: | |
branches: | |
- main | |
paths: | |
- 'presets/inference/**' | |
- 'presets/models/supported_models.yaml' | |
push: | |
branches: | |
- main | |
paths: | |
- 'presets/inference/**' | |
- 'presets/models/supported_models.yaml' | |
workflow_dispatch: | |
inputs: | |
force-run-all: | |
type: boolean | |
default: false | |
description: "Run all models for build" | |
env: | |
GO_VERSION: "1.22" | |
BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
WEIGHTS_DIR: "/mnt/storage" | |
permissions: | |
id-token: write | |
contents: write | |
jobs: | |
determine-models: | |
runs-on: ubuntu-latest | |
environment: preset-env | |
outputs: | |
matrix: ${{ steps.affected_models.outputs.matrix }} | |
is_matrix_empty: ${{ steps.check_matrix_empty.outputs.is_empty }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 | |
with: | |
submodules: true | |
fetch-depth: 0 | |
- name: Set FORCE_RUN_ALL Flag | |
run: echo "FORCE_RUN_ALL=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all == 'true' }}" >> $GITHUB_ENV | |
# This script should output a JSON array of model names | |
- name: Determine Affected Models | |
id: affected_models | |
run: | | |
PR_BRANCH=${{ env.BRANCH_NAME }} \ | |
FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \ | |
python3 .github/workflows/kind-cluster/determine_models.py | |
- name: Print Determined Models | |
run: | | |
echo "Output from affected_models: ${{ steps.affected_models.outputs.matrix }}" | |
- name: Check if Matrix is Empty | |
id: check_matrix_empty | |
run: | | |
if [ "${{ steps.affected_models.outputs.matrix }}" == "[]" ] || [ -z "${{ steps.affected_models.outputs.matrix }}" ]; then | |
echo "is_empty=true" >> $GITHUB_OUTPUT | |
else | |
echo "is_empty=false" >> $GITHUB_OUTPUT | |
fi | |
build-models: | |
needs: determine-models | |
if: needs.determine-models.outputs.is_matrix_empty == 'false' | |
runs-on: [ "self-hosted", "1ES.Pool=1es-aks-kaito-image-build-agent-pool-ubuntu" ] | |
environment: preset-env | |
strategy: | |
fail-fast: false | |
matrix: | |
model: ${{fromJson(needs.determine-models.outputs.matrix)}} | |
max-parallel: 3 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 | |
with: | |
submodules: true | |
fetch-depth: 0 | |
- name: Install Azure CLI latest | |
run: | | |
if ! which az > /dev/null; then | |
echo "Azure CLI not found. Installing..." | |
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash | |
else | |
echo "Azure CLI already installed." | |
fi | |
- name: Ensure Docker is Installed | |
run: | | |
# Add Docker's official GPG key: | |
sudo apt-get update | |
sudo apt-get install ca-certificates curl -y | |
sudo install -m 0755 -d /etc/apt/keyrings | |
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc | |
sudo chmod a+r /etc/apt/keyrings/docker.asc | |
# Add the repository to Apt sources: | |
echo \ | |
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ | |
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ | |
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null | |
sudo apt-get update | |
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y | |
# User Permissions | |
sudo usermod -aG docker $(whoami) | |
sudo systemctl restart docker | |
sudo chmod 666 /var/run/docker.sock | |
- name: Test Docker Access | |
run: | | |
ls -l /var/run/docker.sock | |
docker run hello-world | |
- name: List All Disks | |
run: | | |
lsblk | |
if ! mountpoint -q /mnt/storage; then | |
echo "Failed to find required storage partition /mnt/storage" | |
exit 1 | |
fi | |
- name: Check Available Disk Space | |
run: | | |
echo "Initial disk usage:" | |
df -h | |
# Remove unused Docker resources | |
docker system prune -a -f --volumes | |
# Check Docker-related disk usage | |
echo "Docker-related disk usage after cleanup:" | |
docker system df | |
# Check final disk usage | |
echo "Final disk usage:" | |
df -h | |
- name: Ensure Python is Installed | |
run: | | |
if ! command -v python3 &> /dev/null; then | |
sudo apt-get update | |
sudo apt-get install -y python3 | |
fi | |
- name: Ensure git and git LFS is Installed | |
run: | | |
if ! command -v git &> /dev/null; then | |
sudo apt-get update | |
sudo apt-get install -y git | |
fi | |
if ! git lfs --version &> /dev/null; then | |
sudo apt-get update | |
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash | |
sudo apt-get install -y git-lfs | |
git lfs install | |
fi | |
- name: Ensure kubectl is Installed | |
run: | | |
if ! command -v kubectl &> /dev/null; then | |
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" | |
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl | |
kubectl version --client --output=yaml | |
fi | |
- name: Ensure Kind is Installed | |
run: | | |
if ! command -v kind &> /dev/null; then | |
if [ $(uname -m) = x86_64 ]; then | |
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.23.0/kind-linux-amd64 | |
elif [ $(uname -m) = aarch64 ]; then | |
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.23.0/kind-linux-arm64 | |
fi | |
chmod +x ./kind | |
sudo mv ./kind /usr/local/bin/kind | |
fi | |
- name: Configure Docker to Use /mnt/storage/docker | |
run: | | |
sudo systemctl stop docker | |
sudo mkdir -p /mnt/storage/docker | |
# Move existing Docker data | |
if [ -d /var/lib/docker ]; then | |
sudo rsync -aP /var/lib/docker/ /mnt/storage/docker/ | |
fi | |
echo '{"data-root":"/mnt/storage/docker"}' | sudo tee /etc/docker/daemon.json | |
sudo systemctl start docker | |
- name: Verify Docker Configuration | |
run: | | |
docker info | grep "Docker Root Dir" | |
- name: Authenticate to ACR | |
run: | | |
az login --identity | |
az acr login -n ${{ secrets.PROD_1ES_ACR_USERNAME }} | |
- name: 'Get ACR Info' | |
id: acr_info | |
run: | | |
ACR_NAME="${{ secrets.PROD_1ES_ACR_USERNAME }}" | |
ACR_USERNAME=${{ secrets.PROD_1ES_ACR_USERNAME }} | |
ACR_PASSWORD=${{ secrets.PROD_1ES_ACR_PASSWORD }} | |
echo "ACR_NAME=$ACR_NAME" >> $GITHUB_OUTPUT | |
echo "ACR_USERNAME=$ACR_USERNAME" >> $GITHUB_OUTPUT | |
echo "ACR_PASSWORD=$ACR_PASSWORD" >> $GITHUB_OUTPUT | |
- name: 'Check if Image exists in Test ACR' | |
id: check_test_image | |
run: | | |
ACR_NAME=${{ steps.acr_info.outputs.ACR_USERNAME }} | |
IMAGE_NAME=staging/aks/kaito/kaito-${{ matrix.model.name }} | |
TAG=${{ matrix.model.tag }} | |
# Use '|| true' to prevent script from exiting with an error if the repository is not found | |
TAGS=$(az acr repository show-tags -n $ACR_NAME --repository $IMAGE_NAME --output tsv || true) | |
if [[ -z "$TAGS" ]]; then | |
echo "Image $IMAGE_NAME:$TAG or repository not found in $ACR_NAME." | |
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT | |
else | |
if echo "$TAGS" | grep -q "^$TAG$"; then | |
echo "IMAGE_EXISTS=true" >> $GITHUB_OUTPUT | |
else | |
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT | |
echo "Image $IMAGE_NAME:$TAG not found in $ACR_NAME." | |
fi | |
fi | |
- name: Set Permissions for DataDrive /mnt/storage | |
run : | | |
sudo chown -R $(whoami) /mnt/storage | |
sudo chmod -R 775 /mnt/storage | |
sudo apt-get update | |
sudo apt-get install acl -y | |
sudo setfacl -dm u::rwx /mnt/storage # Default user permissions | |
sudo setfacl -dm g::rwx /mnt/storage # Default group permissions | |
sudo setfacl -dm o::rx /mnt/storage # Default others permissions | |
- name: Check and Create Kind Cluster | |
run: | | |
if ! kind get clusters | grep -q kind; then | |
echo "Creating directory for etcd storage" | |
sudo mkdir -p /mnt/storage/etcd | |
echo "Creating Kind cluster using kind-1es.yaml" | |
kind create cluster --config .github/workflows/kind-cluster/kind-1es.yaml | |
else | |
echo "Kind cluster already exists" | |
fi | |
- name: Launch Python Script to Kickoff Build Jobs | |
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'false' | |
id: launch_script | |
run: | | |
PR_BRANCH=${{ env.BRANCH_NAME }} \ | |
ACR_NAME=${{ steps.acr_info.outputs.ACR_NAME }} \ | |
ACR_USERNAME=${{ steps.acr_info.outputs.ACR_USERNAME }} \ | |
ACR_PASSWORD=${{ steps.acr_info.outputs.ACR_PASSWORD }} \ | |
MODEL_NAME=${{ matrix.model.name }} \ | |
MODEL_TYPE=${{matrix.model.type}} \ | |
MODEL_VERSION=${{ matrix.model.version }} \ | |
MODEL_RUNTIME=${{ matrix.model.runtime }} \ | |
MODEL_TAG=${{ matrix.model.tag }} \ | |
WEIGHTS_DIR=${{ env.WEIGHTS_DIR }} \ | |
python3 .github/workflows/kind-cluster/main.py | |
# Check the exit status of the Python script | |
- name: Check Python Script Status | |
if: ${{ always() }} | |
run: | | |
if [[ "${{ steps.check_test_image.outputs.IMAGE_EXISTS }}" == "true" ]]; then | |
echo "Image already exists; skipping the status step." | |
elif [[ "${{ steps.launch_script.outcome }}" != "success" ]]; then | |
echo "Python script failed to execute successfully." | |
exit 1 # Fail the job due to script failure | |
else | |
echo "Python script executed successfully." | |
fi | |
# Cleanup Resources | |
- name: Cleanup | |
if: ${{ always() }} | |
run: | | |
if [[ "${{ steps.check_test_image.outputs.IMAGE_EXISTS }}" == "false" ]]; then | |
kubectl get job --no-headers -o custom-columns=":metadata.name" | grep "^docker-build-job-${{ matrix.model.name }}-[0-9]" | xargs -r kubectl delete job | |
fi |