ci: support windows oneapi #59
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: ci | |
permissions: | |
contents: read | |
pull-requests: read | |
actions: read | |
env: | |
VERSION: "${{ github.ref_name }}" | |
on: | |
workflow_dispatch: { } | |
push: | |
tags: | |
- "v*.*.*" | |
branches: | |
- main | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
pull_request: | |
branches: | |
- main | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- "**.mdx" | |
- "**.png" | |
- "**.jpg" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
# cache-able building with ccache. | |
darwin-metal: | |
if: ${{ false }} | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64, arm64 ] | |
version: [ '3.0' ] | |
# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images, | |
# https://support.apple.com/en-us/102894. | |
runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup XCode | |
uses: maxim-lobanov/setup-xcode@v1 | |
with: | |
xcode-version: '15.2' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-darwin-metal-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Deps | |
run: | | |
brew update && brew install ccache | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== BUILD =====" | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_ACCELERATE=on -DLLAMA_METAL=on -DLLAMA_METAL_EMBED_LIBRARY=on \ | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} \ | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
otool -L ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }} | |
# cache-able building with ccache. | |
linux-hip: | |
if: ${{ false }} | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://hub.docker.com/r/rocm/dev-ubuntu-22.04/tags. | |
# 6.1 ==> 6.1.2 | |
# 5.7 ==> 5.7.1 | |
version: [ '6.1', '5.7' ] | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
# official gpu support list, | |
# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.1.2/reference/system-requirements.html, | |
# https://rocm.docs.amd.com/en/docs-5.7.1/release/gpu_os_support.html. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
AMDGPU_TARGETS: "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_HIPBLAS=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \ | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} \ | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CC=/opt/rocm/llvm/bin/clang \ | |
--env CXX=/opt/rocm/llvm/bin/clang++ \ | |
--env CCACHE_DIR \ | |
--env AMDGPU_TARGETS \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
rocm/dev-ubuntu-22.04:${{ matrix.version == '6.1' && '6.1.2' || '5.7.1' }}-complete | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }} | |
# cache-able building with ccache. | |
linux-cuda: | |
if: ${{ false }} | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel. | |
# 12.5 ==> 12.5.0 | |
# 11.7 ==> 11.7.1 | |
version: [ '12.5', '11.7' ] | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
CUDA_ARCHITECTURES: "${{ startsWith(matrix.version, '12.') && '50;52;53;60;61;62;70;72;75;80;86;87;89;90' || '50;52;53;60;61;62;70;72;75;80;86;87' }}" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \ | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} \ | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CCACHE_DIR \ | |
--env CUDA_ARCHITECTURES \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
nvidia/cuda:${{ matrix.version == '12.5' && '12.5.0' || '11.7.1' }}-devel-ubuntu22.04 | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }} | |
# cache-able building with ccache. | |
linux-oneapi: | |
if: ${{ false }} | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://hub.docker.com/r/intel/oneapi-basekit/tags?page=&page_size=&ordering=&name=devel. | |
# 2024.2 ==> 2024.2.0 | |
# 2024.1 ==> 2024.1.1 | |
version: [ '2024.2', '2024.1' ] | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Maximize Space | |
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml. | |
run: | | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/ghc | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-linux-oneapi-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}/.cache | |
- name: Setup QEMU | |
if: ${{ matrix.arch == 'arm64' }} | |
uses: docker/setup-qemu-action@v3 | |
with: | |
image: tonistiigi/binfmt:qemu-v7.0.0 | |
platforms: "arm64" | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" | |
run: | | |
echo "===== SCRIPT =====" | |
cat <<EOF > /tmp/entrypoint.sh | |
#!/bin/bash | |
apt-get update && apt-get install -y build-essential git cmake ccache | |
git config --system --add safe.directory '*' | |
mkdir -p ${{ github.workspace }}/.cache | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_SYCL=on -DLLAMA_SYCL_F16=on \ | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} \ | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) | |
EOF | |
chmod +x /tmp/entrypoint.sh | |
cat /tmp/entrypoint.sh | |
echo "===== BUILD =====" | |
docker run \ | |
--rm \ | |
--privileged \ | |
--platform linux/${{ matrix.arch }} \ | |
--volume ${{ github.workspace }}:${{ github.workspace }} \ | |
--workdir ${{ github.workspace }} \ | |
--env CC=icx \ | |
--env CXX=icx \ | |
--env CCACHE_DIR \ | |
--volume /tmp/entrypoint.sh:/entrypoint.sh \ | |
--entrypoint /entrypoint.sh \ | |
intel/oneapi-basekit:${{ matrix.version == '2024.2' && '2024.2.0' || '2024.1.1' }}-devel-ubuntu22.04 | |
echo "===== RESULT =====" | |
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then | |
ldd ${{ github.workspace }}/build/bin/llama-box | |
else | |
exit 1 | |
fi | |
echo "===== PACKAGE =====" | |
mkdir -p ${{ github.workspace }}/out | |
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/* | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out/*.zip | |
name: llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }} | |
# cache-able building with ccache. | |
windows-hip: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html. | |
# 5.7 ==> 5.7.1 | |
# 5.5 ==> 5.5.1 | |
version: [ '5.7', '5.5' ] | |
runs-on: windows-2022 | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }} | |
path: | | |
${{ github.workspace }}\.cache | |
- name: Setup HIP | |
id: sdk | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I install AMD ROCm HIP SDK" | |
Invoke-WebRequest -OutFile "${{ runner.temp }}\installer.exe" ` | |
-Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ matrix.version == '5.7' && '23.Q4' || '23.Q3' }}-WinSvr2022-For-HIP.exe" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-install' | |
Write-Host "I verify AMD ROCm HIP SDK" | |
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version | |
$hipPath = "$(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path)" | |
"HIP_PATH=${hipPath}" | Out-File -FilePath $env:GITHUB_OUTPUT -Append | |
"HIP_PATH=${hipPath}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ccache -y | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, | |
# https://llvm.org/docs/AMDGPUUsage.html. | |
# official gpu support list, | |
# see https://rocm.docs.amd.com/en/docs-5.7.1/release/windows_support.html, | |
# https://rocm.docs.amd.com/en/docs-5.5.1/release/windows_support.html. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" | |
AMDGPU_TARGETS: "${{ matrix.version == '5.7' && 'gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102' || 'gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102' }}" | |
run: | | |
Write-Host "===== BUILD =====" | |
Write-Host "HIP_PATH=${env:HIP_PATH}" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
$env:HIP_PATH = "${{ steps.sdk.outputs.HIP_PATH }}" | |
$env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}" | |
cmake -G "Unix Makefiles" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` | |
-DLLAMA_HIPBLAS=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" ` | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} ` | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }} | |
# uncache-able building, | |
# see https://stackoverflow.com/questions/72829476/how-to-use-ccache-4-6-1-on-windows-msvc-with-cmake. | |
windows-cuda: | |
if: ${{ false }} | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network. | |
# 12.5 ==> 12.5.0 | |
# 11.7 ==> 11.7.1 | |
version: [ '12.5', '11.7' ] | |
# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images, | |
# https://forums.developer.nvidia.com/t/problems-with-latest-vs2022-update/294150. | |
runs-on: ${{ matrix.version == '12.5' && 'windows-2022' || 'windows-2019' }} | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup CUDA | |
# ensure MSBuildExtensions has been configured, | |
# see https://github.com/NVlabs/tiny-cuda-nn/issues/164#issuecomment-1280749170. | |
id: sdk | |
uses: Jimver/[email protected] | |
with: | |
cuda: ${{ matrix.version == '12.5' && '12.5.0' || '11.7.1' }} | |
method: 'network' | |
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]' | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
# build fat binary, | |
# see https://developer.nvidia.com/cuda-gpus. | |
env: | |
CUDA_ARCHITECTURES: "${{ startsWith(matrix.version, '12.') && '50;52;53;60;61;62;70;72;75;80;86;87;89;90' || '50;52;53;60;61;62;70;72;75;80;86;87' }}" | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "===== BUILD =====" | |
Write-Host "CUDA_PATH=${{ steps.sdk.outputs.CUDA_PATH }}" | |
$env:CUDA_PATH = "${{ steps.sdk.outputs.CUDA_PATH }}" | |
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" ` | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} ` | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- /m:${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\Release\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\Release\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\Release\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }} | |
# uncache-able building, inspired by | |
# https://github.com/oneapi-src/oneapi-ci/blob/master/.github/workflows/build_all.yml. | |
windows-oneapi: | |
strategy: | |
fail-fast: false | |
matrix: | |
arch: [ amd64 ] | |
# see https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?operatingsystem=windows&windows-install-type=offline. | |
# 2024.2 ==> 2024.2.0 | |
# 2024.1 ==> 2024.1.1 | |
version: [ '2024.2' ] | |
runs-on: windows-2022 | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: 'recursive' | |
- name: Setup Cache | |
timeout-minutes: 5 | |
uses: actions/cache@v3 | |
with: | |
key: oneapi_installer-windows-2022-${{ matrix.version == '2024.2' && '2024.2.0' || '2024.1.1' }} | |
path: | | |
${{ runner.temp }} | |
- name: Setup oneAPI | |
id: sdk | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
Write-Host "I install Intel oneAPI SDK" | |
Invoke-WebRequest -OutFile "${{ runner.temp }}\installer.exe" ` | |
-Uri "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/${{ matrix.version == '2024.2' && 'e83a8e64-04fc-45df-85c6-c2208d03bdb5/w_BaseKit_p_2024.2.0.635' || '7dff44ba-e3af-4448-841c-0d616c8da6e7/w_BaseKit_p_2024.1.0.595' }}.exe" | |
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait ` | |
-ArgumentList '-s','--action=install','--components=intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel','--eula=accept','-p=NEED_VS2017_INTEGRATION=0','-p=NEED_VS2019_INTEGRATION=0','-p=NEED_VS2022_INTEGRATION=0' | |
$oneapiRoot = "C:\Program Files (x86)\Intel\oneAPI" | |
"ONEAPI_ROOT=${oneapiRoot}" | Out-File -FilePath $env:GITHUB_OUTPUT -Append | |
"ONEAPI_ROOT=${oneapiRoot}" | Out-File -FilePath $env:GITHUB_ENV -Append | |
- name: Deps | |
run: | | |
$ErrorActionPreference = "Stop" | |
$ProgressPreference = 'SilentlyContinue' | |
choco install ccache -y | |
- name: Build | |
# disable OpenMP, | |
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, | |
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. | |
env: | |
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" | |
run: | | |
Write-Host "===== BUILD =====" | |
Write-Host "ONEAPI_ROOT=${{ steps.sdk.outputs.ONEAPI_ROOT }}" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null | |
$env:ONEAPI_ROOT = "${{ steps.sdk.outputs.ONEAPI_ROOT }}" | |
& "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force | |
cmake -G "MinGW Makefiles" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` | |
-DCMAKE_C_COMPILER="icx" -DCMAKE_CXX_COMPILER="icx" ` | |
-DLLAMA_SYCL=on -DLLAMA_SYCL_F16=on ` | |
${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} ` | |
-DLLAMA_OPENMP=off | |
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} | |
Write-Host "===== RESULT =====" | |
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { | |
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" | |
} else { | |
exit 1 | |
} | |
Write-Host "===== PACKAGE =====" | |
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null | |
Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
path: ${{ github.workspace }}\\out\\*.zip | |
name: llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }} | |
release: | |
if: ${{ startsWith(github.ref, 'refs/tags/') }} | |
permissions: | |
contents: write | |
actions: read | |
id-token: write | |
runs-on: ubuntu-22.04 | |
needs: | |
- darwin-metal | |
- linux-hip | |
- linux-cuda | |
- linux-oneapi | |
- windows-hip | |
- windows-cuda | |
- windows-oneapi | |
steps: | |
- name: Download Artifact | |
uses: actions/download-artifact@v4 | |
with: | |
path: ${{ github.workspace }}/out | |
merge-multiple: true | |
- name: Release | |
uses: softprops/action-gh-release@v1 | |
with: | |
fail_on_unmatched_files: true | |
tag_name: "${{ env.VERSION }}" | |
prerelease: ${{ contains(github.ref, 'rc') }} | |
files: ${{ github.workspace }}/out/* |