tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml

##### start trigger Don't edit it manually, Please do edit set-trigger-rules.py ####
trigger:
  branches:
    include:
    - main
    - rel-*
  paths:
    exclude:
    - docs/**
    - README.md
    - CONTRIBUTING.md
    - BUILD.md
    - 'js/web'
    - 'onnxruntime/core/providers/js'
pr:
  branches:
    include:
    - main
    - rel-*
  paths:
    exclude:
    - docs/**
    - README.md
    - CONTRIBUTING.md
    - BUILD.md
    - 'js/web'
    - 'onnxruntime/core/providers/js'
#### end trigger ####parameters:

# reference: https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
parameters:
- name: specificArtifact
  displayName: Use Specific Artifact
  type: boolean
  default: false
- name: BuildId
  displayName: Specific Artifact's RunId
  type: number
  default: 0

resources:
  repositories:
  - repository: manylinux
    type: Github
    endpoint: Microsoft
    name: pypa/manylinux
    ref: 5eda9aded5462201e6310105728d33016e637ea7

  - repository: LLaMa2Onnx
    type: Github
    endpoint: Microsoft
    name: Microsoft/Llama-2-Onnx
    ref: main

variables:
  - template: templates/common-variables.yml
  - name: docker_base_image
    value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
  - name: linux_trt_version
    value: 8.6.1.6-1.cuda11.8

stages:
- stage: Build_Onnxruntime_Cuda
  jobs:
  - job: Linux_Build
    timeoutInMinutes: 120
    variables:
      skipComponentGovernanceDetection: true
      CCACHE_DIR: $(Pipeline.Workspace)/ccache
    workspace:
      clean: all
    pool: onnxruntime-Ubuntu2204-AMD-CPU
    steps:
    - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
      displayName: 'Clean Agent Directories'
      condition: always()

    - checkout: self
      clean: true
      submodules: none

    - template: templates/get-docker-image-steps.yml
      parameters:
        Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
        Context: tools/ci_build/github/linux/docker
        DockerBuildArgs: "
        --network=host
        --build-arg BASEIMAGE=$(docker_base_image)
        --build-arg TRT_VERSION=$(linux_trt_version)
        --build-arg BUILD_UID=$( id -u )
        "
        Repository: onnxruntimecuda11build

    - task: Cache@2
      inputs:
        key: '"ccache" | "$(Build.SourceBranch)" | "$(Build.SourceVersion)"'
        path: $(CCACHE_DIR)
        restoreKeys: |
          "ccache" | "$(Build.SourceBranch)"
          "ccache"
        cacheHitVar: CACHE_RESTORED
      displayName: Cach Task

    - script: |
        sudo mkdir -p $(Pipeline.Workspace)/ccache
      condition: ne(variables.CACHE_RESTORED, 'true')
      displayName: Create Cache Dir

    - task: CmdLine@2
      inputs:
        script: |
          mkdir -p $HOME/.onnx
          docker run -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \
            --volume /data/onnx:/data/onnx:ro \
            --volume $(Build.SourcesDirectory):/onnxruntime_src \
            --volume $(Build.BinariesDirectory):/build \
            --volume /data/models:/build/models:ro \
            --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
            --volume $(Pipeline.Workspace)/ccache:/cache \
            -e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
            -e NIGHTLY_BUILD \
            -e BUILD_BUILDNUMBER \
            -e CCACHE_DIR=/cache \
            onnxruntimecuda11build \
            /bin/bash -c "
              set -ex; \
              env; \
              ccache -s; \
              /opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
                --build_dir /build --cmake_generator Ninja \
                --config Release --update --build \
                --skip_submodule_sync \
                --build_shared_lib \
                --parallel \
                --build_wheel \
                --enable_onnx_tests --use_cuda --cuda_version=${{variables.common_cuda_version}} --cuda_home=/usr/local/cuda-${{variables.common_cuda_version}} --cudnn_home=/usr/local/cuda-${{variables.common_cuda_version}} \
                --enable_cuda_profiling --enable_cuda_nhwc_ops \
                --enable_pybind --build_java \
                --use_cache \
                --cmake_extra_defines  'CMAKE_CUDA_ARCHITECTURES=75;86' ; \
                ccache -sv; \
                ccache -z"
        workingDirectory: $(Build.SourcesDirectory)

    - task: CmdLine@2
      inputs:
        script: |
          rm -rf $(Build.BinariesDirectory)/Release/onnxruntime $(Build.BinariesDirectory)/Release/pybind11
          rm -f $(Build.BinariesDirectory)/Release/models
          find $(Build.BinariesDirectory)/Release/_deps -mindepth 1 ! -regex '^$(Build.BinariesDirectory)/Release/_deps/onnx-src\(/.*\)?' -delete
          cd $(Build.BinariesDirectory)/Release
          find -executable -type f > $(Build.BinariesDirectory)/Release/perms.txt

    - script: |
        set -ex
        mkdir -p $(Agent.TempDirectory)/ort
        cp $(Build.BinariesDirectory)/Release/dist/*.whl $(Agent.TempDirectory)/ort/
      displayName: 'Copy Wheels'

    - task: PublishPipelineArtifact@0
      displayName: 'Publish Pipeline Artifact'
      inputs:
        artifactName: 'drop-ort-linux-gpu'
        targetPath: '$(Agent.TempDirectory)/ort'

    - template: templates/explicitly-defined-final-tasks.yml

- stage: Stable_Diffusion
  dependsOn:
  - Build_Onnxruntime_Cuda
  jobs:
  - job: Stable_Diffusion
    variables:
      skipComponentGovernanceDetection: true
      CLIP_MODEL_CACHE: $(Agent.TempDirectory)/clip_cache
      STABLE_DIFFUSION_MODEL_CACHE: $(Agent.TempDirectory)/stablediffusion_cache
      GenerateImage_DIR: $(Agent.TempDirectory)/images
    workspace:
      clean: all
    pool: onnxruntime-Linux-GPU-A10-12G
    steps:
    - checkout: self
      clean: true
      submodules: none

    - template: templates/flex-downloadPipelineArtifact.yml
      parameters:
        StepName: 'Download Onnxruntime Artifact'
        ArtifactName: 'drop-ort-linux-gpu'
        TargetPath: '$(Build.BinariesDirectory)/Release'
        SpecificArtifact: ${{ parameters.specificArtifact }}
        BuildId: ${{ parameters.BuildId }}

    - task: Cache@2
      inputs:
        key: stable_diffusion | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py
        restoreKeys: |
          stable_diffusion | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py
          stable_diffusion
        path: $(STABLE_DIFFUSION_MODEL_CACHE)
      displayName: Cache stable diffusion model

    - script: |
        mkdir -p $(GenerateImage_DIR)
        docker run --rm --gpus all -v $PWD:/workspace \
          -v $(Build.BinariesDirectory)/Release:/Release \
          -v $(STABLE_DIFFUSION_MODEL_CACHE):/model_cache:rw \
          -v $(GenerateImage_DIR):/images:rw \
          nvcr.io/nvidia/pytorch:22.11-py3 \
          bash -c ' \
            set -ex; \
            python3 --version; \
            python3 -m pip install --upgrade pip; \
            python3 -m pip install /Release/*.whl; \
            pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion; \
            python3 -m pip install -r requirements-cuda11.txt; \
            python3 -m pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com; \
            echo Generate an image guided by a text prompt; \
            python3 demo_txt2img.py --framework-model-dir /model_cache --seed 1 --deterministic "astronaut riding a horse on mars" ; \
            find $(pwd)/ORT_CUDA -name "*.png" -exec cp {} /images/ \; ; \
            popd ; \
            '
      displayName: 'Run stable diffusion demo'
      workingDirectory: $(Build.SourcesDirectory)

    # For verification we will check the generated image looks .
    - task: PublishPipelineArtifact@0
      displayName: 'Publish code coverage report'
      inputs:
          artifactName: "Generated-Image"
          targetPath: '$(GenerateImage_DIR)'

    - task: Cache@2
      inputs:
        key: clip_model | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py
        restoreKeys: |
          clip_model | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py
          clip_model
        path: $(CLIP_MODEL_CACHE)
      displayName: Cache clip model

    - script: |
        docker run --rm --gpus all -v $PWD:/workspace \
          -v $(CLIP_MODEL_CACHE):/model_cache:rw  \
          nvcr.io/nvidia/pytorch:22.11-py3 \
          bash -c '
            set -ex; \
            python3 --version; \
            python3 -m pip install --upgrade pip; \
            pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion/; \
            image2=$(find $(pwd) -name "astronaut_riding_a_h*.png") ; \
            pushd test; \
            python3 -m pip install -r requirements.txt; \
            echo check demo_txt2image.py generate image; \
            python3 -u check_image.py --image1 astronaut_riding_txt2image-DDIM-50.png --image2 $image2 --cache_dir /model_cache ; \
            popd ; \
            popd ; \
            '
      displayName: 'Check the generated image'
      workingDirectory: $(Build.SourcesDirectory)

- stage: Llama2_ONNX_FP16
  dependsOn:
  - Build_Onnxruntime_Cuda
  jobs:
  - job: Llama2_ONNX_FP16
    variables:
      skipComponentGovernanceDetection: true
    workspace:
      clean: all
    pool: Onnxruntime-Linux-A10-24G
    steps:
    - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
      displayName: 'Clean Agent Directories'
      condition: always()

    - checkout: self
      clean: true
      submodules: none

    - template: templates/flex-downloadPipelineArtifact.yml
      parameters:
        StepName: 'Download Onnxruntime Artifact'
        ArtifactName: 'drop-ort-linux-gpu'
        TargetPath: '$(Build.BinariesDirectory)/ort-artifact/'
        SpecificArtifact: ${{ parameters.specificArtifact }}
        BuildId: ${{ parameters.BuildId }}

    - template: templates/get-docker-image-steps.yml
      parameters:
        Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6
        Context: tools/ci_build/github/linux/docker/
        ScriptName: tools/ci_build/get_docker_image.py
        DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
        Repository: onnxruntimeubi8packagestest
        UpdateDepsTxt: false

    - task: DownloadPackage@1
      displayName: 'Download Meta Llama2 model'
      inputs:
        packageType: upack
        feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
        version: 1.0.0
        definition: '6fe0c4ed-9d0e-4d66-94cc-fb6a111d02a5'
        downloadPath: $(Agent.TempDirectory)/meta_llama2_7b_hf

    - script: |
        docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
           -v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
           -v $(Agent.TempDirectory)/meta_llama2_7b_hf:/meta-llama2 \
           onnxruntimeubi8packagestest \
            bash -c "
              set -ex; \
              pushd /workspace/onnxruntime/python/tools/transformers/ ; \
              python3 -m pip install --upgrade pip ; \
              pushd models/llama ; \
              python3 -m pip install -r requirements-cuda.txt ; \
              popd ; \
              python3 -m pip install /ort-artifact/*.whl ; \
              python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
              python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --output llama2-7b-fp16 --precision fp16 --execution_provider cuda --input /meta-llama2 --small_gpu ;\
              popd ; \
            "
      displayName: 'Run Llama2 to Onnx F16 and parity Test'
      workingDirectory: $(Build.SourcesDirectory)