refactor: cmake #42
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: ci | |
permissions: | |
contents: read | |
pull-requests: read | |
actions: read | |
env: | |
VERSION: "${{ github.ref_name }}" | |
on: | |
workflow_dispatch: { } | |
push: | |
tags: | |
- "v*.*.*" | |
branches: | |
- main | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
pull_request: | |
branches: | |
- main | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
darwin-metal: | |
if: ${{ false }} | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64, arm64 ] | |
version: [ '3.0' ] | |
# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images, | |
# https://support.apple.com/en-us/102894. | |
runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup XCode | |
uses: maxim-lobanov/setup-xcode@v1 | |
with: | |
xcode-version: '15.2' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-darwin-metal-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Deps | |
run: | | |
brew update && brew install ccache cmake | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== BUILD =====" | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_ACCELERATE=on -DLLAMA_METAL=on -DLLAMA_METAL_EMBED_LIBRARY=on \ | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} \ | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j 4 | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
otool -L ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }} | |
linux-hip: | |
if: ${{ false }} | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://hub.docker.com/r/rocm/dev-ubuntu-22.04/tags. | |
# 6.1 ==> 6.1.2 | |
# 5.7 ==> 5.7.1 | |
version: [ '6.1', '5.7' ] | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
AMDGPU_TARGETS: "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_HIPBLAS=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \ | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} \ | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j 4 | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CC=/opt/rocm/llvm/bin/clang \ | |
--env CXX=/opt/rocm/llvm/bin/clang++ \ | |
--env CCACHE_DIR \ | |
--env AMDGPU_TARGETS \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
rocm/dev-ubuntu-22.04:${{ matrix.version == '6.1' && '6.1.2' || '5.7.1' }}-complete | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }} | |
linux-cuda: | |
if: ${{ false }} | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel. | |
# 12.5 ==> 12.5.0 | |
# 11.7 ==> 11.7.1 | |
version: [ '12.5', '11.7' ] | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
CUDA_ARCHITECTURES: "${{ startsWith(matrix.version, '12.') && '50;52;53;60;61;62;70;72;75;80;86;87;89;90' || '50;52;53;60;61;62;70;72;75;80;86;87' }}" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \ | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} \ | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j 4 | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CCACHE_DIR \ | |
--env CUDA_ARCHITECTURES \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
nvidia/cuda:${{ matrix.version == '12.5' && '12.5.0' || '11.7.1' }}-devel-ubuntu22.04 | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }} | |
windows-hip: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html. | |
# 5.7 ==> 5.7.1 | |
# 5.5 ==> 5.5.1 | |
version: [ '5.7', '5.5' ] | |
runs-on: windows-2022 | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }} | |
path: | | |
${{ github.workspace }}\.cache | |
${{ github.workspace }}\tmp | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$WarningPreference = 'SilentlyContinue' | |
$VerbosePreference = 'SilentlyContinue' | |
$DebugPreference = 'SilentlyContinue' | |
$ProgressPreference = 'SilentlyContinue' | |
choco install cmake make ccache | |
- name: Setup HIP SDK | |
run: | | |
$ErrorActionPreference = "Stop" | |
$WarningPreference = 'SilentlyContinue' | |
$VerbosePreference = 'SilentlyContinue' | |
$DebugPreference = 'SilentlyContinue' | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I install AMD ROCm HIP SDK" | |
if (-not (Test-Path -Path "${{ github.workspace }}\tmp\rocm-install.exe" -PathType Leaf)) { | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\tmp" -ErrorAction Ignore | Out-Null | |
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ matrix.version == '5.7' && '23.Q4' || '23.Q3' }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" | |
Move-Item -Path "${env:RUNNER_TEMP}\rocm-install.exe" -Destination "${{ github.workspace }}\tmp\rocm-install.exe" | |
} | |
Start-Process "${{ github.workspace }}\tmp\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait | |
Write-Host "I verify AMD ROCm HIP SDK" | |
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" | |
AMDGPU_TARGETS: "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" | |
run: | | |
Write-Host "===== BUILD =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
$env:HIP_PATH = $(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path) | |
$env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}" | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` | |
-DLLAMA_HIPBLAS=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" ` | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} ` | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j 4 | |
Write-Host "===== RESULT =====" | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
ldd.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }} | |
windows-cuda: | |
if: ${{ false }} | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network. | |
# 12.5 ==> 12.5.0 | |
# 11.7 ==> 11.7.1 | |
version: [ '12.5', '11.7' ] | |
runs-on: windows-2022 | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-windows-cuda-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$WarningPreference = 'SilentlyContinue' | |
$VerbosePreference = 'SilentlyContinue' | |
$DebugPreference = 'SilentlyContinue' | |
$ProgressPreference = 'SilentlyContinue' | |
choco install cmake make ccache | |
- name: Setup CUDA toolkit | |
id: cuda-toolkit | |
uses: Jimver/[email protected] | |
with: | |
cuda: ${{ matrix.version == '12.5' && '12.5.0' || '11.7.1' }} | |
method: 'network' | |
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]' | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
CUDA_ARCHITECTURES: "${{ startsWith(matrix.version, '12.') && '50;52;53;60;61;62;70;72;75;80;86;87;89;90' || '50;52;53;60;61;62;70;72;75;80;86;87' }}" | |
run: | | |
Write-Host "===== BUILD =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
$env:CMAKE_PREFIX_PATH = "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}" | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" ` | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} ` | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j 4 | |
Write-Host "===== RESULT =====" | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
ldd.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }} | |
release: | |
if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
permissions: | |
contents: write | |
actions: read | |
id-token: write | |
runs-on: ubuntu-22.04 | |
needs: | |
- darwin-metal | |
- linux-hip | |
- linux-cuda | |
- windows-hip | |
- windows-cuda | |
steps: | |
- name: Download Artifact | |
uses: actions/download-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out | |
merge-multiple: true | |
- name: Release | |
uses: softprops/action-gh-release@v1 | |
with: | |
fail_on_unmatched_files: true | |
tag_name: "${{ env.VERSION }}" | |
prerelease: ${{ contains(github.ref, 'rc') }} | |
files: ${{ github.workspace }}/out/* |