feat: Enhance CI/CD (Build, E2E, Composite Action) with 1ES Migration and Phi-3 Integration #4
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build and Push Preset Models 1ES | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
on: | |
pull_request: | |
branches: | |
- main | |
paths: | |
- 'presets/inference/**' | |
- 'presets/models/supported_models.yaml' | |
push: | |
branches: | |
- main | |
paths: | |
- 'presets/inference/**' | |
- 'presets/models/supported_models.yaml' | |
workflow_dispatch: | |
inputs: | |
force-run-all: | |
type: boolean | |
default: false | |
description: "Run all models for build" | |
env: | |
GO_VERSION: "1.22" | |
BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
WEIGHTS_DIR: "/mnt/storage" | |
permissions: | |
id-token: write | |
contents: write | |
jobs: | |
setup: | |
runs-on: [ "self-hosted", "1ES.Pool=1es-aks-kaito-image-build-agent-pool-ubuntu" ] | |
environment: preset-env | |
steps: | |
- name: List All Disks | |
run: | | |
lsblk | |
if ! mountpoint -q /mnt/storage; then | |
echo "Failed to find required storage partition /mnt/storage" | |
exit 1 | |
fi | |
- name: Check Available Disk Space | |
run: df -h | |
- name: Ensure Python is Installed | |
run: | | |
if ! command -v python3 &> /dev/null; then | |
sudo apt-get update | |
sudo apt-get install -y python3 | |
fi | |
- name: Ensure kubectl is Installed | |
run: | | |
if ! command -v kubectl &> /dev/null; then | |
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" | |
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl | |
kubectl version --client --output=yaml | |
fi | |
- name: Ensure Docker is Installed | |
run: | | |
# Add Docker's official GPG key: | |
sudo apt-get update | |
sudo apt-get install ca-certificates curl -y | |
sudo install -m 0755 -d /etc/apt/keyrings | |
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc | |
sudo chmod a+r /etc/apt/keyrings/docker.asc | |
# Add the repository to Apt sources: | |
echo \ | |
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ | |
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ | |
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null | |
sudo apt-get update | |
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y | |
# User Permissions | |
sudo usermod -aG docker $(whoami) | |
sudo systemctl restart docker | |
sudo chmod 666 /var/run/docker.sock | |
- name: Test Docker Access | |
run: | | |
ls -l /var/run/docker.sock | |
docker run hello-world | |
- name: Ensure Kind is Installed | |
run: | | |
if ! command -v kind &> /dev/null; then | |
if [ $(uname -m) = x86_64 ]; then | |
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.23.0/kind-linux-amd64 | |
elif [ $(uname -m) = aarch64 ]; then | |
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.23.0/kind-linux-arm64 | |
fi | |
chmod +x ./kind | |
sudo mv ./kind /usr/local/bin/kind | |
fi | |
determine-models: | |
needs: setup | |
runs-on: ubuntu-latest | |
environment: preset-env | |
outputs: | |
matrix: ${{ steps.affected_models.outputs.matrix }} | |
is_matrix_empty: ${{ steps.check_matrix_empty.outputs.is_empty }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 | |
with: | |
submodules: true | |
fetch-depth: 0 | |
- name: Set FORCE_RUN_ALL Flag | |
run: echo "FORCE_RUN_ALL=${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force-run-all == 'true' }}" >> $GITHUB_ENV | |
# This script should output a JSON array of model names | |
- name: Determine Affected Models | |
id: affected_models | |
run: | | |
PR_BRANCH=${{ env.BRANCH_NAME }} \ | |
FORCE_RUN_ALL=${{ env.FORCE_RUN_ALL }} \ | |
python3 .github/workflows/kind-cluster/determine_models.py | |
- name: Print Determined Models | |
run: | | |
echo "Output from affected_models: ${{ steps.affected_models.outputs.matrix }}" | |
- name: Check if Matrix is Empty | |
id: check_matrix_empty | |
run: | | |
if [ "${{ steps.affected_models.outputs.matrix }}" == "[]" ] || [ -z "${{ steps.affected_models.outputs.matrix }}" ]; then | |
echo "is_empty=true" >> $GITHUB_OUTPUT | |
else | |
echo "is_empty=false" >> $GITHUB_OUTPUT | |
fi | |
build-models: | |
needs: determine-models | |
if: needs.determine-models.outputs.is_matrix_empty == 'false' | |
runs-on: [ "self-hosted", "1ES.Pool=1es-aks-kaito-image-build-agent-pool-ubuntu" ] | |
environment: preset-env | |
strategy: | |
fail-fast: false | |
matrix: | |
model: ${{fromJson(needs.determine-models.outputs.matrix)}} | |
max-parallel: 3 | |
steps: | |
- name: Checkout | |
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 | |
with: | |
submodules: true | |
fetch-depth: 0 | |
- name: Install Azure CLI latest | |
run: | | |
if ! which az > /dev/null; then | |
echo "Azure CLI not found. Installing..." | |
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash | |
else | |
echo "Azure CLI already installed." | |
fi | |
- name: Authenticate to ACR | |
run: | | |
az login --identity | |
az acr login -n ${{ secrets.PROD_1ES_ACR_USERNAME }} | |
- name: 'Get ACR Info' | |
id: acr_info | |
run: | | |
ACR_NAME="${{ secrets.PROD_1ES_ACR_USERNAME }}" | |
ACR_USERNAME=${{ secrets.PROD_1ES_ACR_USERNAME }} | |
ACR_PASSWORD=${{ secrets.PROD_1ES_ACR_PASSWORD }} | |
echo "ACR_NAME=$ACR_NAME" >> $GITHUB_OUTPUT | |
echo "ACR_USERNAME=$ACR_USERNAME" >> $GITHUB_OUTPUT | |
echo "ACR_PASSWORD=$ACR_PASSWORD" >> $GITHUB_OUTPUT | |
- name: 'Check if Image exists in Test ACR' | |
id: check_test_image | |
run: | | |
ACR_NAME=${{ steps.acr_info.outputs.ACR_USERNAME }} | |
IMAGE_NAME=staging/aks/kaito/kaito-${{ matrix.model.name }} | |
TAG=${{ matrix.model.tag }} | |
# Use '|| true' to prevent script from exiting with an error if the repository is not found | |
TAGS=$(az acr repository show-tags -n $ACR_NAME --repository $IMAGE_NAME --output tsv || true) | |
if [[ -z "$TAGS" ]]; then | |
echo "Image $IMAGE_NAME:$TAG or repository not found in $ACR_NAME." | |
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT | |
else | |
if echo "$TAGS" | grep -q "^$TAG$"; then | |
echo "IMAGE_EXISTS=true" >> $GITHUB_OUTPUT | |
else | |
echo "IMAGE_EXISTS=false" >> $GITHUB_OUTPUT | |
echo "Image $IMAGE_NAME:$TAG not found in $ACR_NAME." | |
fi | |
fi | |
- name: Set Permissions for DataDrive /mnt/storage | |
run : | | |
sudo chown -R $(whoami) /mnt/storage | |
sudo chmod -R 775 /mnt/storage | |
sudo apt-get update | |
sudo apt-get install acl -y | |
sudo setfacl -dm u::rwx /mnt/storage # Default user permissions | |
sudo setfacl -dm g::rwx /mnt/storage # Default group permissions | |
sudo setfacl -dm o::rx /mnt/storage # Default others permissions | |
- name: Check and Create Kind Cluster | |
run: | | |
if ! kind get clusters | grep -q kind; then | |
echo "Creating directory for etcd storage" | |
sudo mkdir -p /mnt/storage/etcd | |
echo "Creating Kind cluster using kind-1es.yaml" | |
kind create cluster --config .github/workflows/kind-cluster/kind-1es.yaml | |
else | |
echo "Kind cluster already exists" | |
fi | |
- name: Launch Python Script to Kickoff Build Jobs | |
if: steps.check_test_image.outputs.IMAGE_EXISTS == 'false' | |
id: launch_script | |
run: | | |
PR_BRANCH=${{ env.BRANCH_NAME }} \ | |
ACR_NAME=${{ steps.acr_info.outputs.ACR_NAME }} \ | |
ACR_USERNAME=${{ steps.acr_info.outputs.ACR_USERNAME }} \ | |
ACR_PASSWORD=${{ steps.acr_info.outputs.ACR_PASSWORD }} \ | |
MODEL_NAME=${{ matrix.model.name }} \ | |
MODEL_TYPE=${{matrix.model.type}} \ | |
MODEL_VERSION=${{ matrix.model.version }} \ | |
MODEL_RUNTIME=${{ matrix.model.runtime }} \ | |
MODEL_TAG=${{ matrix.model.tag }} \ | |
WEIGHTS_DIR=${{ env.WEIGHTS_DIR }} \ | |
python3 .github/workflows/kind-cluster/main.py | |
# Check the exit status of the Python script | |
- name: Check Python Script Status | |
if: ${{ always() }} | |
run: | | |
if [[ "${{ steps.check_test_image.outputs.IMAGE_EXISTS }}" == "true" ]]; then | |
echo "Image already exists; skipping the status step." | |
elif [[ "${{ steps.launch_script.outcome }}" != "success" ]]; then | |
echo "Python script failed to execute successfully." | |
exit 1 # Fail the job due to script failure | |
else | |
echo "Python script executed successfully." | |
fi | |
# Cleanup Resources | |
- name: Cleanup | |
if: ${{ always() }} | |
run: | | |
if [[ "${{ steps.check_test_image.outputs.IMAGE_EXISTS }}" == "false" ]]; then | |
kubectl get job --no-headers -o custom-columns=":metadata.name" | grep "^docker-build-job-${{ matrix.model.name }}-[0-9]" | xargs -r kubectl delete job | |
fi |