refactor: cmake #45

Workflow file for this run

	name: ci

	permissions:
	contents: read
	pull-requests: read
	actions: read

	env:
	VERSION: "${{ github.ref_name }}"

	on:
	workflow_dispatch: { }
	push:
	tags:
	- "v..*"
	branches:
	- main
	paths-ignore:
	- "docs/**"
	- "**.md"
	- "**.mdx"
	- "**.png"
	- "**.jpg"
	pull_request:
	branches:
	- main
	paths-ignore:
	- "docs/**"
	- "**.md"
	- "**.mdx"
	- "**.png"
	- "**.jpg"

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref && github.ref \|\| github.run_id }}
	cancel-in-progress: true

	jobs:
	darwin-metal:
	strategy:
	fail-fast: false
	matrix:
	arch: [ amd64, arm64 ]
	version: [ '3.0' ]
	# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images,
	# https://support.apple.com/en-us/102894.
	runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' \|\| 'macos-14' }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup XCode
	uses: maxim-lobanov/setup-xcode@v1
	with:
	xcode-version: '15.2'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v3
	with:
	key: cache-darwin-metal-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Deps
	run: \|
	brew update && brew install ccache
	- name: Build
	# disable OpenMP,
	# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
	# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	run: \|
	echo "===== BUILD ====="
	mkdir -p ${{ github.workspace }}/.cache
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DLLAMA_ACCELERATE=on -DLLAMA_METAL=on -DLLAMA_METAL_EMBED_LIBRARY=on \
	${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' \|\| '-DLLAMA_NATIVE=on' }} \
	-DLLAMA_OPENMP=off
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j 4

	echo "===== RESULT ====="
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	otool -L ${{ github.workspace }}/build/bin/llama-box
	else
	exit 1
	fi

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/build/bin/*
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }}

	linux-hip:
	strategy:
	fail-fast: false
	matrix:
	arch: [ amd64 ]
	# see https://hub.docker.com/r/rocm/dev-ubuntu-22.04/tags.
	# 6.1 ==> 6.1.2
	# 5.7 ==> 5.7.1
	version: [ '6.1', '5.7' ]
	runs-on: ubuntu-22.04
	steps:
	- name: Maximize Space
	# see https://github.com/easimon/maximize-build-space/blob/master/action.yml.
	run: \|
	sudo rm -rf /usr/share/dotnet
	sudo rm -rf /usr/local/lib/android
	sudo rm -rf /opt/ghc
	sudo rm -rf /opt/hostedtoolcache/CodeQL
	sudo docker image prune --all --force
	- name: Clone
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v3
	with:
	key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Setup QEMU
	if: ${{ matrix.arch == 'arm64' }}
	uses: docker/setup-qemu-action@v3
	with:
	image: tonistiigi/binfmt:qemu-v7.0.0
	platforms: "arm64"
	- name: Build
	# disable OpenMP,
	# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
	# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
	# build fat binary,
	# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
	# https://llvm.org/docs/AMDGPUUsage.html.
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	AMDGPU_TARGETS: "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	apt-get update && apt-get install -y build-essential git cmake ccache
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DLLAMA_HIPBLAS=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \
	${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' \|\| '-DLLAMA_NATIVE=on' }} \
	-DLLAMA_OPENMP=off
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j 4
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	echo "===== BUILD ====="
	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env CC=/opt/rocm/llvm/bin/clang \
	--env CXX=/opt/rocm/llvm/bin/clang++ \
	--env CCACHE_DIR \
	--env AMDGPU_TARGETS \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	rocm/dev-ubuntu-22.04:${{ matrix.version == '6.1' && '6.1.2' \|\| '5.7.1' }}-complete

	echo "===== RESULT ====="
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd ${{ github.workspace }}/build/bin/llama-box
	else
	exit 1
	fi

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/*
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}

	linux-cuda:
	strategy:
	fail-fast: false
	matrix:
	arch: [ amd64 ]
	# see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel.
	# 12.5 ==> 12.5.0
	# 11.7 ==> 11.7.1
	version: [ '12.5', '11.7' ]
	runs-on: ubuntu-22.04
	steps:
	- name: Maximize Space
	# see https://github.com/easimon/maximize-build-space/blob/master/action.yml.
	run: \|
	sudo rm -rf /usr/share/dotnet
	sudo rm -rf /usr/local/lib/android
	sudo rm -rf /opt/ghc
	sudo rm -rf /opt/hostedtoolcache/CodeQL
	sudo docker image prune --all --force
	- name: Clone
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v3
	with:
	key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }}
	path: \|
	${{ github.workspace }}/.cache
	- name: Setup QEMU
	if: ${{ matrix.arch == 'arm64' }}
	uses: docker/setup-qemu-action@v3
	with:
	image: tonistiigi/binfmt:qemu-v7.0.0
	platforms: "arm64"
	- name: Build
	# disable OpenMP,
	# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
	# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
	# build fat binary,
	# see https://developer.nvidia.com/cuda-gpus.
	env:
	CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
	CUDA_ARCHITECTURES: "${{ startsWith(matrix.version, '12.') && '50;52;53;60;61;62;70;72;75;80;86;87;89;90' \|\| '50;52;53;60;61;62;70;72;75;80;86;87' }}"
	run: \|
	echo "===== SCRIPT ====="
	cat <<EOF > /tmp/entrypoint.sh
	#!/bin/bash
	apt-get update && apt-get install -y build-essential git cmake ccache
	git config --system --add safe.directory '*'
	mkdir -p ${{ github.workspace }}/.cache
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
	-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \
	${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' \|\| '-DLLAMA_NATIVE=on' }} \
	-DLLAMA_OPENMP=off
	cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j 4
	EOF
	chmod +x /tmp/entrypoint.sh
	cat /tmp/entrypoint.sh

	echo "===== BUILD ====="
	docker run \
	--rm \
	--privileged \
	--platform linux/${{ matrix.arch }} \
	--volume ${{ github.workspace }}:${{ github.workspace }} \
	--workdir ${{ github.workspace }} \
	--env CCACHE_DIR \
	--env CUDA_ARCHITECTURES \
	--volume /tmp/entrypoint.sh:/entrypoint.sh \
	--entrypoint /entrypoint.sh \
	nvidia/cuda:${{ matrix.version == '12.5' && '12.5.0' \|\| '11.7.1' }}-devel-ubuntu22.04

	echo "===== RESULT ====="
	if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
	ldd ${{ github.workspace }}/build/bin/llama-box
	else
	exit 1
	fi

	echo "===== PACKAGE ====="
	mkdir -p ${{ github.workspace }}/out
	zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/*
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}/out/*.zip
	name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}

	windows-hip:
	strategy:
	fail-fast: false
	matrix:
	arch: [ amd64 ]
	# see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html.
	# 5.7 ==> 5.7.1
	# 5.5 ==> 5.5.1
	version: [ '5.7', '5.5' ]
	runs-on: windows-2022
	steps:
	- name: Clone
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v3
	with:
	key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }}
	path: \|
	${{ github.workspace }}\.cache
	${{ github.workspace }}\tmp
	- name: Deps
	run: \|
	$ErrorActionPreference = "Stop"
	$WarningPreference = 'SilentlyContinue'
	$VerbosePreference = 'SilentlyContinue'
	$DebugPreference = 'SilentlyContinue'
	$ProgressPreference = 'SilentlyContinue'

	choco install ccache
	- name: Setup HIP SDK
	run: \|
	$ErrorActionPreference = "Stop"
	$WarningPreference = 'SilentlyContinue'
	$VerbosePreference = 'SilentlyContinue'
	$DebugPreference = 'SilentlyContinue'
	$ProgressPreference = 'SilentlyContinue'

	Write-Host "I install AMD ROCm HIP SDK"
	if (-not (Test-Path -Path "${{ github.workspace }}\tmp\rocm-install.exe" -PathType Leaf)) {
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\tmp" -ErrorAction Ignore \| Out-Null
	Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ matrix.version == '5.7' && '23.Q4' \|\| '23.Q3' }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
	Move-Item -Path "${env:RUNNER_TEMP}\rocm-install.exe" -Destination "${{ github.workspace }}\tmp\rocm-install.exe"
	}
	Start-Process "${{ github.workspace }}\tmp\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait

	Write-Host "I verify AMD ROCm HIP SDK"
	& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
	- name: Build
	# disable OpenMP,
	# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
	# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
	# build fat binary,
	# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
	# https://llvm.org/docs/AMDGPUUsage.html.
	env:
	CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
	AMDGPU_TARGETS: "${{ matrix.version == '5.7' && 'gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102' \|\| 'gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102' }}"
	run: \|
	Write-Host "===== BUILD ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore \| Out-Null
	$env:HIP_PATH = $(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' \| Split-Path \| Split-Path)
	$env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}"
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
	-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
	-DLLAMA_HIPBLAS=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" `
	${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' \|\| '-DLLAMA_NATIVE=on' }} `
	-DLLAMA_OPENMP=off
	cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j 4

	Write-Host "===== RESULT ====="
	if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
	} else {
	exit 1
	}

	Write-Host "===== PACKAGE ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore \| Out-Null
	Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}.zip"
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}\\out\\*.zip
	name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}

	windows-cuda:
	strategy:
	fail-fast: false
	matrix:
	arch: [ amd64 ]
	# see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network.
	# 12.5 ==> 12.5.0
	# 11.7 ==> 11.7.1
	version: [ '12.5', '11.7' ]
	runs-on: windows-2022
	steps:
	- name: Clone
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: 'recursive'
	- name: Setup Cache
	timeout-minutes: 5
	uses: actions/cache@v3
	with:
	key: cache-windows-cuda-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }}
	path: \|
	${{ github.workspace }}\\.cache
	- name: Deps
	run: \|
	$ErrorActionPreference = "Stop"
	$WarningPreference = 'SilentlyContinue'
	$VerbosePreference = 'SilentlyContinue'
	$DebugPreference = 'SilentlyContinue'
	$ProgressPreference = 'SilentlyContinue'

	choco install ccache
	- name: Setup CUDA toolkit
	id: cuda-toolkit
	uses: Jimver/[email protected]
	with:
	cuda: ${{ matrix.version == '12.5' && '12.5.0' \|\| '11.7.1' }}
	method: 'network'
	sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
	- name: Build
	# disable OpenMP,
	# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
	# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
	# build fat binary,
	# see https://developer.nvidia.com/cuda-gpus.
	env:
	CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
	CUDA_ARCHITECTURES: "${{ startsWith(matrix.version, '12.') && '50;52;53;60;61;62;70;72;75;80;86;87;89;90' \|\| '50;52;53;60;61;62;70;72;75;80;86;87' }}"
	run: \|
	Write-Host "===== BUILD ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore \| Out-Null
	$env:CUDA_PATH = "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}"
	$env:CMAKE_PREFIX_PATH = "${env:CUDA_PATH}"
	cmake -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
	-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" `
	${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' \|\| '-DLLAMA_NATIVE=on' }} `
	-DLLAMA_OPENMP=off
	cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j 4

	Write-Host "===== RESULT ====="
	if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
	} else {
	exit 1
	}

	Write-Host "===== PACKAGE ====="
	New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore \| Out-Null
	Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip"
	- name: Upload Artifact
	uses: actions/upload-artifact@v4
	with:
	path: ${{ github.workspace }}\\out\\*.zip
	name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}

	release:
	if: ${{ startsWith(github.ref, 'refs/tags/') }}
	permissions:
	contents: write
	actions: read
	id-token: write
	runs-on: ubuntu-22.04
	needs:
	- darwin-metal
	- linux-hip
	- linux-cuda
	- windows-hip
	- windows-cuda
	steps:
	- name: Download Artifact
	uses: actions/download-artifact@v4
	with:
	path: ${{ github.workspace }}/out
	merge-multiple: true
	- name: Release
	uses: softprops/action-gh-release@v1
	with:
	fail_on_unmatched_files: true
	tag_name: "${{ env.VERSION }}"
	prerelease: ${{ contains(github.ref, 'rc') }}
	files: ${{ github.workspace }}/out/*

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

refactor: cmake #45

Workflow file

refactor: cmake #45

Jobs

Run details

Workflow file for this run