From a73361795e3e27b50e52a244f0d2f3e7779c7b83 Mon Sep 17 00:00:00 2001 From: thxCode Date: Wed, 26 Jun 2024 17:43:02 +0800 Subject: [PATCH] refactor: cmake Signed-off-by: thxCode --- .github/workflows/ci.yml | 263 ++++++-- CMakeLists.txt | 784 ++++++++++++++++++++++++ Makefile | 375 ------------ README.md | 2 +- llama-box/CMakeLists.txt | 53 ++ llama-box/param.hpp | 1 - llama-box/scripts/gen-version-cpp.cmake | 85 +++ llama-box/scripts/version.sh | 60 -- llama-box/version.cpp.in | 3 + 9 files changed, 1130 insertions(+), 496 deletions(-) create mode 100644 CMakeLists.txt delete mode 100644 Makefile create mode 100644 llama-box/CMakeLists.txt create mode 100644 llama-box/scripts/gen-version-cpp.cmake delete mode 100755 llama-box/scripts/version.sh create mode 100644 llama-box/version.cpp.in diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3029742..0186864 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,6 +41,7 @@ jobs: fail-fast: false matrix: arch: [ amd64, arm64 ] + version: [ '3.0' ] # see https://github.com/actions/runner-images?tab=readme-ov-file#available-images, # https://support.apple.com/en-us/102894. runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }} @@ -58,17 +59,26 @@ jobs: timeout-minutes: 5 uses: actions/cache@v3 with: - key: cache-darwin-metal-${{ matrix.arch }}-${{ hashFiles('**/.gitmodules') }} + key: cache-darwin-metal-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }} path: | ${{ github.workspace }}/.cache - name: Deps run: | brew update && brew install ccache - name: Build + # disable OpenMP, + # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, + # https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. + env: + CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" run: | echo "===== BUILD =====" mkdir -p ${{ github.workspace }}/.cache - CCACHE_DIR=${{ github.workspace }}/.cache/ccache make -j LLAMA_METAL=1 LLAMA_NO_OPENMP=1 ${{ matrix.arch == 'amd64' && 'CFLAGS="-mfma -mf16c -mavx -mavx2" CXXFLAGS="-mfma -mf16c -mavx -mavx2"' || '' }} + cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ + -DLLAMA_ACCELERATE=on -DLLAMA_METAL=on -DLLAMA_METAL_EMBED_LIBRARY=on \ + ${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} \ + -DLLAMA_OPENMP=off + cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) echo "===== RESULT =====" if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then @@ -84,14 +94,17 @@ jobs: uses: actions/upload-artifact@v4 with: path: ${{ github.workspace }}/out/*.zip - name: llama-box-darwin-${{ matrix.arch }}-metal + name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }} linux-hip: strategy: fail-fast: false matrix: arch: [ amd64 ] - version: [ '6.0.2' ] + # see https://hub.docker.com/r/rocm/dev-ubuntu-22.04/tags. + # 6.1 ==> 6.1.2 + # 5.7 ==> 5.7.1 + version: [ '6.1', '5.7' ] runs-on: ubuntu-22.04 steps: - name: Maximize Space @@ -121,40 +134,48 @@ jobs: image: tonistiigi/binfmt:qemu-v7.0.0 platforms: "arm64" - name: Build - # disable OpenMP to support static linking, + # disable OpenMP, # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, # https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. # build fat binary, # see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, # https://llvm.org/docs/AMDGPUUsage.html. + # official gpu support list, + # see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.1.2/reference/system-requirements.html, + # https://rocm.docs.amd.com/en/docs-5.7.1/release/gpu_os_support.html. env: - GPU_TARGETS: "gfx803 gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1010 gfx1030 gfx1100 gfx1101 gfx1102" + CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" + AMDGPU_TARGETS: "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" run: | echo "===== SCRIPT =====" cat < /tmp/entrypoint.sh #!/bin/bash - apt-get update && apt-get install -y build-essential git libgomp1 ccache - git config --global --add safe.directory /workspace/llama.cpp - make -j LLAMA_HIPBLAS=1 LLAMA_NO_OPENMP=1 ${{ matrix.arch == 'amd64' && 'CFLAGS="-mfma -mf16c -mavx -mavx2" CXXFLAGS="-mfma -mf16c -mavx -mavx2"' || '' }} + apt-get update && apt-get install -y build-essential git cmake ccache + git config --system --add safe.directory '*' + mkdir -p ${{ github.workspace }}/.cache + cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ + -DLLAMA_HIPBLAS=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \ + ${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} \ + -DLLAMA_OPENMP=off + cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) EOF chmod +x /tmp/entrypoint.sh cat /tmp/entrypoint.sh echo "===== BUILD =====" - mkdir -p ${{ github.workspace }}/.cache docker run \ --rm \ --privileged \ --platform linux/${{ matrix.arch }} \ - --volume ${{ github.workspace }}:/workspace \ - --volume /tmp/entrypoint.sh:/entrypoint.sh \ - --entrypoint /entrypoint.sh \ - --workdir /workspace \ - --env CCACHE_DIR=/workspace/.cache/ccache \ + --volume ${{ github.workspace }}:${{ github.workspace }} \ + --workdir ${{ github.workspace }} \ --env CC=/opt/rocm/llvm/bin/clang \ --env CXX=/opt/rocm/llvm/bin/clang++ \ - --env AMDGPU_TARGETS="${{ env.GPU_TARGETS }}" \ - rocm/dev-ubuntu-22.04:${{ matrix.version }}-complete + --env CCACHE_DIR \ + --env AMDGPU_TARGETS \ + --volume /tmp/entrypoint.sh:/entrypoint.sh \ + --entrypoint /entrypoint.sh \ + rocm/dev-ubuntu-22.04:${{ matrix.version == '6.1' && '6.1.2' || '5.7.1' }}-complete echo "===== RESULT =====" if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then @@ -177,7 +198,10 @@ jobs: fail-fast: false matrix: arch: [ amd64 ] - version: [ '12.2.0', '11.7.1' ] + # see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel. + # 12.5 ==> 12.5.0 + # 11.7 ==> 11.7.1 + version: [ '12.5', '11.7' ] runs-on: ubuntu-22.04 steps: - name: Maximize Space @@ -207,33 +231,42 @@ jobs: image: tonistiigi/binfmt:qemu-v7.0.0 platforms: "arm64" - name: Build - # disable OpenMP to support static linking, + # disable OpenMP, # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, # https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. + # build fat binary, + # see https://developer.nvidia.com/cuda-gpus. + env: + CCACHE_DIR: "${{ github.workspace }}/.cache/ccache" + CUDA_ARCHITECTURES: "${{ startsWith(matrix.version, '12.') && '50;52;53;60;61;62;70;72;75;80;86;87;89;90' || '50;52;53;60;61;62;70;72;75;80;86;87' }}" run: | echo "===== SCRIPT =====" cat < /tmp/entrypoint.sh #!/bin/bash - apt-get update && apt-get install -y build-essential git libgomp1 ccache - git config --global --add safe.directory /workspace/llama.cpp - make -j LLAMA_CUDA=1 LLAMA_NO_OPENMP=1 LLAMA_CUDA_FORCE_MMQ=1 ${{ matrix.arch == 'amd64' && 'CFLAGS="-mfma -mf16c -mavx -mavx2" CXXFLAGS="-mfma -mf16c -mavx -mavx2"' || '' }} + apt-get update && apt-get install -y build-essential git cmake ccache + git config --system --add safe.directory '*' + mkdir -p ${{ github.workspace }}/.cache + cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \ + -DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \ + ${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} \ + -DLLAMA_OPENMP=off + cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc) EOF chmod +x /tmp/entrypoint.sh cat /tmp/entrypoint.sh echo "===== BUILD =====" - mkdir -p ./.cache docker run \ --rm \ --privileged \ --platform linux/${{ matrix.arch }} \ - --volume ${{ github.workspace }}:/workspace \ - --workdir /workspace \ + --volume ${{ github.workspace }}:${{ github.workspace }} \ + --workdir ${{ github.workspace }} \ + --env CCACHE_DIR \ + --env CUDA_ARCHITECTURES \ --volume /tmp/entrypoint.sh:/entrypoint.sh \ --entrypoint /entrypoint.sh \ - --env CCACHE_DIR=/workspace/.cache/ccache \ - --env CUDA_DOCKER_ARCH=all \ - nvidia/cuda:${{ matrix.version }}-devel-ubuntu22.04 + nvidia/cuda:${{ matrix.version == '12.5' && '12.5.0' || '11.7.1' }}-devel-ubuntu22.04 echo "===== RESULT =====" if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then @@ -251,15 +284,15 @@ jobs: path: ${{ github.workspace }}/out/*.zip name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }} - # wip windows-hip: - if: ${{ false }} strategy: fail-fast: false matrix: arch: [ amd64 ] # see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html. - version: [ '6.0.2' ] + # 5.7 ==> 5.7.1 + # 5.5 ==> 5.5.1 + version: [ '5.7', '5.5' ] runs-on: windows-2022 steps: - name: Clone @@ -273,8 +306,18 @@ jobs: with: key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }} path: | - ${{ github.workspace }}/.cache + ${{ github.workspace }}\.cache + ${{ github.workspace }}\tmp - name: Deps + run: | + $ErrorActionPreference = "Stop" + $WarningPreference = 'SilentlyContinue' + $VerbosePreference = 'SilentlyContinue' + $DebugPreference = 'SilentlyContinue' + $ProgressPreference = 'SilentlyContinue' + + choco install ccache + - name: Setup HIP SDK run: | $ErrorActionPreference = "Stop" $WarningPreference = 'SilentlyContinue' @@ -282,41 +325,141 @@ jobs: $DebugPreference = 'SilentlyContinue' $ProgressPreference = 'SilentlyContinue' - Write-Host "I install necessary tools" - choco install mingw ccache - - Write-Host "I verify neccessary tools" - make.exe --version - - Write-Host "I install AMD HIP SDK" - Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ startsWith(matrix.version, '6') && '23.Q4' || '23.Q3' }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" - Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait - - Write-Host "I configure the PATH environment variable" - $path = $(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path -Parent) - $newMachinePath = $path + ";" + [System.Environment]::GetEnvironmentVariable("Path","Machine") - [Environment]::SetEnvironmentVariable("Path", $newMachinePath, [System.EnvironmentVariableTarget]::Machine) - $env:Path = $path + ";" + $env:Path - - Write-Host "I verify AMD HIP clang" - clang.exe --version + Write-Host "I install AMD ROCm HIP SDK" + if (-not (Test-Path -Path "${{ github.workspace }}\tmp\rocm-install.exe" -PathType Leaf)) { + New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\tmp" -ErrorAction Ignore | Out-Null + Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ matrix.version == '5.7' && '23.Q4' || '23.Q3' }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" + Move-Item -Path "${env:RUNNER_TEMP}\rocm-install.exe" -Destination "${{ github.workspace }}\tmp\rocm-install.exe" + } + Start-Process "${{ github.workspace }}\tmp\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait + + Write-Host "I verify AMD ROCm HIP SDK" + & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version - name: Build + # disable OpenMP, + # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, + # https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. + # build fat binary, + # see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878, + # https://llvm.org/docs/AMDGPUUsage.html. + # official gpu support list, + # see https://rocm.docs.amd.com/en/docs-5.7.1/release/windows_support.html, + # https://rocm.docs.amd.com/en/docs-5.5.1/release/windows_support.html. env: - GPU_TARGETS: "gfx803 gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1010 gfx1030 gfx1100 gfx1101 gfx1102" + CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" + AMDGPU_TARGETS: "${{ matrix.version == '5.7' && 'gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102' || 'gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102' }}" run: | - Write-Host "===== BUILD =====" - New-Item -Force -ItemType Directory -Path "${{ github.workspace }}/.cache" -ErrorAction Ignore | Out-Null - $env:CCACHE_DIR = "${{ github.workspace }}/.cache/ccache" - $env:CC = "clang" - $env:CXX = "clang++" - make -j LLAMA_HIPBLAS=1 LLAMA_NO_OPENMP=1 ${{ matrix.arch == 'amd64' && 'CFLAGS="-mfma -mf16c -mavx -mavx2"' || '' }} + Write-Host "===== BUILD =====" + New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null + $env:HIP_PATH = $(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path) + $env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}" + cmake -G "Unix Makefiles" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` + -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` + -DLLAMA_HIPBLAS=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" ` + ${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} ` + -DLLAMA_OPENMP=off + cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} Write-Host "===== RESULT =====" - if (Test-Path -Path "${{ github.workspace }}/build/bin/llama-box.exe") { - llvm-objdump.exe -p "${{ github.workspace }}/build/bin/llama-box.exe" + if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { + try { + Write-Host "===== LLVM-OBJDUMP =====" + llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" + } cache {} + try { + Write-Host "===== DUMPBIN =====" + dumpbin /DEPENDENTS "${{ github.workspace }}\build\bin\llama-box.exe" + } cache {} + try { + Write-Host "===== OBJDUMP =====" + objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe" + } cache {} } else { exit 1 } + + Write-Host "===== PACKAGE =====" + New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null + Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}.zip" + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + path: ${{ github.workspace }}\\out\\*.zip + name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }} + + windows-cuda: + strategy: + fail-fast: false + matrix: + arch: [ amd64 ] + # see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network. + # 12.5 ==> 12.5.0 + # 11.7 ==> 11.7.1 + version: [ '12.5', '11.7' ] + runs-on: windows-2022 + steps: + - name: Clone + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: 'recursive' + - name: Setup Cache + timeout-minutes: 5 + uses: actions/cache@v3 + with: + key: cache-windows-cuda-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }} + path: | + ${{ github.workspace }}\\.cache + - name: Deps + run: | + $ErrorActionPreference = "Stop" + $WarningPreference = 'SilentlyContinue' + $VerbosePreference = 'SilentlyContinue' + $DebugPreference = 'SilentlyContinue' + $ProgressPreference = 'SilentlyContinue' + + choco install ccache + - name: Setup CUDA toolkit + id: cuda-toolkit + uses: Jimver/cuda-toolkit@v0.2.16 + with: + cuda: ${{ matrix.version == '12.5' && '12.5.0' || '11.7.1' }} + method: 'network' + sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]' + - name: Build + # disable OpenMP, + # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691, + # https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216. + # build fat binary, + # see https://developer.nvidia.com/cuda-gpus. + env: + CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache" + CUDA_ARCHITECTURES: "${{ startsWith(matrix.version, '12.') && '50;52;53;60;61;62;70;72;75;80;86;87;89;90' || '50;52;53;60;61;62;70;72;75;80;86;87' }}" + run: | + Write-Host "===== BUILD =====" + New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null + $env:CUDA_PATH = "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}" + $env:CMAKE_PREFIX_PATH = "${env:CUDA_PATH}" + cmake -G "NMake Makefiles" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release ` + -DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" ` + ${{ matrix.arch == 'amd64' && '-DLLAMA_NATIVE=off' || '-DLLAMA_NATIVE=on' }} ` + -DLLAMA_OPENMP=off + cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS} + + Write-Host "===== RESULT =====" + if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") { + } else { + exit 1 + } + + Write-Host "===== PACKAGE =====" + New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null + Compress-Archive -Path "${{ github.workspace }}\build\bin\*" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip" + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + path: ${{ github.workspace }}\\out\\*.zip + name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }} release: if: ${{ startsWith(github.ref, 'refs/tags/') }} @@ -329,6 +472,8 @@ jobs: - darwin-metal - linux-hip - linux-cuda + - windows-hip + - windows-cuda steps: - name: Download Artifact uses: actions/download-artifact@v4 diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..13187c6 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,784 @@ +cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. +project("llama-box" C CXX) +include(CheckIncludeFileCXX) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") +endif() + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + + +# +# Option list +# + +if (APPLE) + set(LLAMA_METAL_DEFAULT ON) + set(LLAMA_BLAS_DEFAULT ON) + set(LLAMA_BLAS_VENDOR_DEFAULT "Apple") +else() + set(LLAMA_METAL_DEFAULT OFF) + set(LLAMA_BLAS_DEFAULT OFF) + set(LLAMA_BLAS_VENDOR_DEFAULT "Generic") +endif() + +# general +option(BUILD_SHARED_LIBS "build shared libraries" OFF) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" ON) +option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_CCACHE "llama: use ccache if available" ON) + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +# instruction set specific +if (LLAMA_NATIVE) + set(INS_ENB OFF) +else() + set(INS_ENB ON) +endif() + +option(LLAMA_SVE "llama: enable SVE" OFF) +option(LLAMA_AVX "llama: enable AVX" ${INS_ENB}) +option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB}) +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_AVX512_BF16 "llama: enable AVX512-BF16" OFF) +option(LLAMA_FMA "llama: enable FMA" ${INS_ENB}) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" ${INS_ENB}) +endif() + +if (WIN32) + set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") +endif() + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" ${LLAMA_BLAS_DEFAULT}) +set(LLAMA_BLAS_VENDOR ${LLAMA_BLAS_VENDOR_DEFAULT} CACHE STRING + "llama: BLAS library vendor") +option(LLAMA_CUDA "llama: use CUDA" OFF) +option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) +option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") +option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) +set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING + "llama: max. batch size for using peer access") +option(LLAMA_CUDA_NO_PEER_COPY "llama: do not use peer to peer copies" OFF) +option(LLAMA_CUDA_NO_VMM "llama: do not try to use CUDA VMM" OFF) +option(LLAMA_CUDA_FA_ALL_QUANTS "llama: compile all quants for FlashAttention" OFF) + +option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) +option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) +option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) +option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) +option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) +option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) +set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING + "llama: metal minimum macOS version") +set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)") +option(LLAMA_OPENMP "llama: use OpenMP" ON) +option(LLAMA_SYCL "llama: use SYCL" OFF) +option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device") + +set(LLAMA_BUILD_TESTS OFF CACHE BOOL "llama: build tests") +set(LLAMA_BUILD_EXAMPLES OFF CACHE BOOL "llama: build examples") +set(LLAMA_BUILD_SERVER OFF CACHE BOOL "llama: build server example") +option(LLAMA_LASX "llama: enable lasx" ON) +option(LLAMA_LSX "llama: enable lsx" ON) + +# add perf arguments +set(LLAMA_PERF OFF CACHE BOOL "llama: enable perf") + +# +# Compile flags +# + +if (LLAMA_SYCL) + set(CMAKE_CXX_STANDARD 17) +else() + set(CMAKE_CXX_STANDARD 11) +endif() + +set(CMAKE_CXX_STANDARD_REQUIRED true) +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED true) +set(THREADS_PREFER_PTHREAD_FLAG ON) + +find_package(Threads REQUIRED) +include(CheckCXXCompilerFlag) + +# enable libstdc++ assertions for debug builds +if (CMAKE_SYSTEM_NAME MATCHES "Linux") + add_compile_definitions($<$:_GLIBCXX_ASSERTIONS>) +endif() + +if (NOT MSVC) + if (LLAMA_SANITIZE_THREAD) + add_compile_options(-fsanitize=thread) + link_libraries (-fsanitize=thread) + endif() + + if (LLAMA_SANITIZE_ADDRESS) + add_compile_options(-fsanitize=address -fno-omit-frame-pointer) + link_libraries (-fsanitize=address) + endif() + + if (LLAMA_SANITIZE_UNDEFINED) + add_compile_options(-fsanitize=undefined) + link_libraries (-fsanitize=undefined) + endif() +endif() + +if (APPLE AND LLAMA_ACCELERATE) + find_library(ACCELERATE_FRAMEWORK Accelerate) + + if (ACCELERATE_FRAMEWORK) + message(STATUS "Accelerate framework found") + + add_compile_definitions(ACCELERATE_NEW_LAPACK) + add_compile_definitions(ACCELERATE_LAPACK_ILP64) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) + else() + message(WARNING "Accelerate framework not found") + endif() +endif() + +if (LLAMA_METAL) + find_library(FOUNDATION_LIBRARY Foundation REQUIRED) + find_library(METAL_FRAMEWORK Metal REQUIRED) + find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) + + message(STATUS "Metal framework found") + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} + ${FOUNDATION_LIBRARY} + ${METAL_FRAMEWORK} + ${METALKIT_FRAMEWORK} + ) +endif() + +if (LLAMA_OPENMP) + find_package(OpenMP) + + if (OpenMP_FOUND) + message(STATUS "OpenMP found, Libraries: ${OpenMP_LIBRARIES}") + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX) + else() + message(WARNING "OpenMP not found") + endif() +endif() + +if (LLAMA_BLAS) + if (LLAMA_STATIC) + set(BLA_STATIC ON) + endif() + #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22) + # set(BLA_SIZEOF_INTEGER 8) + #endif() + + set(BLA_VENDOR ${LLAMA_BLAS_VENDOR}) + find_package(BLAS) + + if (BLAS_FOUND) + message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") + + if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${LLAMA_BLAS_VENDOR} MATCHES "Apple")) + # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake. + # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268 + find_package(PkgConfig REQUIRED) + if (${LLAMA_BLAS_VENDOR} MATCHES "Generic") + pkg_check_modules(DepBLAS REQUIRED blas) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "OpenBLAS") + # As of openblas v0.3.22, the 64-bit is named openblas64.pc + pkg_check_modules(DepBLAS openblas64) + if (NOT DepBLAS_FOUND) + pkg_check_modules(DepBLAS REQUIRED openblas) + endif() + elseif (${LLAMA_BLAS_VENDOR} MATCHES "FLAME") + pkg_check_modules(DepBLAS REQUIRED blis) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "ATLAS") + pkg_check_modules(DepBLAS REQUIRED blas-atlas) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "FlexiBLAS") + pkg_check_modules(DepBLAS REQUIRED flexiblas_api) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "Intel") + # all Intel* libraries share the same include path + pkg_check_modules(DepBLAS REQUIRED mkl-sdl) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "NVHPC") + # this doesn't provide pkg-config + # suggest to assign BLAS_INCLUDE_DIRS on your own + if ("${NVHPC_VERSION}" STREQUAL "") + message(WARNING "Better to set NVHPC_VERSION") + else() + set(DepBLAS_FOUND ON) + set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include") + endif() + endif() + if (DepBLAS_FOUND) + set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS}) + else() + message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically" + " detected by pkgconfig, trying to find cblas.h from possible paths...") + find_path(BLAS_INCLUDE_DIRS + NAMES cblas.h + HINTS + /usr/include + /usr/local/include + /usr/include/openblas + /opt/homebrew/opt/openblas/include + /usr/local/opt/openblas/include + /usr/include/x86_64-linux-gnu/openblas/include + ) + endif() + endif() + + message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}") + + add_compile_options(${BLAS_LINKER_FLAGS}) + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES}) + set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS}) + else() + message(WARNING "BLAS not found, please refer to " + "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" + " to set correct LLAMA_BLAS_VENDOR") + endif() +endif() + +if (LLAMA_CUDA) + cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES + + find_package(CUDAToolkit) + if (CUDAToolkit_FOUND) + message(STATUS "CUDA found") + + if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + # 52 == lowest CUDA 12 standard + # 60 == f16 CUDA intrinsics + # 61 == integer CUDA intrinsics + # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster + if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) + set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics + else() + set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics + #set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work + endif() + endif() + message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") + + enable_language(CUDA) + + if (LLAMA_STATIC) + if (WIN32) + # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt) + else () + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) + endif() + else() + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) + endif() + + if (LLAMA_CUDA_NO_VMM) + # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so) + else() + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ... + endif() + else() + message(WARNING "CUDA not found") + endif() +endif() + +if (LLAMA_HIPBLAS) + if (NOT EXISTS $ENV{ROCM_PATH}) + if (NOT EXISTS /opt/rocm) + set(ROCM_PATH /usr) + else() + set(ROCM_PATH /opt/rocm) + endif() + else() + set(ROCM_PATH $ENV{ROCM_PATH}) + endif() + list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}) + list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}/lib64/cmake") + + # CMake on Windows doesn't support the HIP language yet + if(WIN32) + set(CXX_IS_HIPCC TRUE) + else() + string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}") + endif() + + if(CXX_IS_HIPCC) + if(LINUX) + if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") + message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++") + endif() + + message(WARNING "Setting hipcc as the C++ compiler is legacy behavior." + " Prefer setting the HIP compiler directly. See README for details.") + endif() + else() + # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES. + if(AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES) + set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS}) + endif() + cmake_minimum_required(VERSION 3.21) + enable_language(HIP) + endif() + find_package(hip REQUIRED) + find_package(hipblas REQUIRED) + find_package(rocblas REQUIRED) + + message(STATUS "HIP and hipBLAS found") + + if (CXX_IS_HIPCC) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device) + endif() + + if (LLAMA_STATIC) + message(FATAL_ERROR "Static linking not supported for HIP/ROCm") + endif() + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas) +endif() + +if (LLAMA_SYCL) + if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$") + message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA") + endif() + + if (NOT DEFINED ENV{ONEAPI_ROOT}) + message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh") + endif() + #todo: AOT + + find_package(IntelSYCL REQUIRED) + + message(STATUS "SYCL found") + + add_compile_options(-I./) #include DPCT + add_compile_options(-I/${SYCL_INCLUDE_DIR}) + + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib") + if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") + endif() + + if (WIN32) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib) + else() + if (LLAMA_SYCL_TARGET STREQUAL "INTEL") + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) + elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl) + endif() + endif() +endif() + +function(get_flags CCID CCVER) + set(C_FLAGS "") + set(CXX_FLAGS "") + + if (CCID MATCHES "Clang") + set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return) + set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi) + + if ( + (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR + (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0) + ) + list(APPEND C_FLAGS -Wdouble-promotion) + endif() + elseif (CCID STREQUAL "GNU") + set(C_FLAGS -Wdouble-promotion) + set(CXX_FLAGS -Wno-array-bounds) + + if (CCVER VERSION_GREATER_EQUAL 7.1.0) + list(APPEND CXX_FLAGS -Wno-format-truncation) + endif() + if (CCVER VERSION_GREATER_EQUAL 8.1.0) + list(APPEND CXX_FLAGS -Wextra-semi) + endif() + endif() + + set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE) + set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE) +endfunction() + +if (LLAMA_FATAL_WARNINGS) + if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + list(APPEND C_FLAGS -Werror) + list(APPEND CXX_FLAGS -Werror) + elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/WX) + endif() +endif() + +if (LLAMA_ALL_WARNINGS) + if (NOT MSVC) + list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) + list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes + -Werror=implicit-int -Werror=implicit-function-declaration) + list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn) + + list(APPEND C_FLAGS ${WARNING_FLAGS}) + list(APPEND CXX_FLAGS ${WARNING_FLAGS}) + + get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) + + add_compile_options("$<$:${C_FLAGS};${GF_C_FLAGS}>" + "$<$:${CXX_FLAGS};${GF_CXX_FLAGS}>") + else() + # todo : msvc + set(C_FLAGS "") + set(CXX_FLAGS "") + endif() +endif() + +set(CUDA_CXX_FLAGS "") + +if (LLAMA_CUDA) + set(CUDA_FLAGS -use_fast_math) + + if (LLAMA_FATAL_WARNINGS) + list(APPEND CUDA_FLAGS -Werror all-warnings) + endif() + + if (LLAMA_ALL_WARNINGS AND NOT MSVC) + set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c) + if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "") + list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER}) + endif() + + execute_process( + COMMAND ${NVCC_CMD} -Xcompiler --version + OUTPUT_VARIABLE CUDA_CCFULLVER + ERROR_QUIET + ) + + if (NOT CUDA_CCFULLVER MATCHES clang) + set(CUDA_CCID "GNU") + execute_process( + COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion" + OUTPUT_VARIABLE CUDA_CCVER + ERROR_QUIET + ) + else() + if (CUDA_CCFULLVER MATCHES Apple) + set(CUDA_CCID "AppleClang") + else() + set(CUDA_CCID "Clang") + endif() + string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER}) + endif() + + message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}") + + get_flags(${CUDA_CCID} ${CUDA_CCVER}) + list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later + endif() + + if (NOT MSVC) + list(APPEND CUDA_CXX_FLAGS -Wno-pedantic) + endif() +endif() + +if (WIN32) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + + if (BUILD_SHARED_LIBS) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + endif() +endif() + +if (LLAMA_LTO) + include(CheckIPOSupported) + check_ipo_supported(RESULT result OUTPUT output) + if (result) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + message(WARNING "IPO is not supported: ${output}") + endif() +endif() + +if (LLAMA_CCACHE) + find_program(LLAMA_CCACHE_FOUND ccache) + if (LLAMA_CCACHE_FOUND) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + set(ENV{CCACHE_SLOPPINESS} time_macros) + message(STATUS "ccache found, compilation results will be cached. Disable with LLAMA_CCACHE=OFF.") + else() + message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with LLAMA_CCACHE=OFF") + endif () +endif() + +# this version of Apple ld64 is buggy +execute_process( + COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v + ERROR_VARIABLE output + OUTPUT_QUIET +) + +if (output MATCHES "dyld-1015\.7") + add_compile_definitions(HAVE_BUGGY_APPLE_LINKER) +endif() + +# Architecture specific +# TODO: probably these flags need to be tweaked on some architectures +# feel free to update the Makefile for your architecture and send a pull request or issue +message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") +if (MSVC) + string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR) + message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}") +else () + set(CMAKE_GENERATOR_PLATFORM_LWR "") +endif () + +if (NOT MSVC) + if (LLAMA_STATIC) + add_link_options(-static) + if (MINGW) + add_link_options(-static-libgcc -static-libstdc++) + endif() + endif() + if (LLAMA_GPROF) + add_compile_options(-pg) + endif() +endif() + +set(ARCH_FLAGS "") + +if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR + (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND + CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$")) + message(STATUS "ARM detected") + if (MSVC) + add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead + add_compile_definitions(__ARM_NEON) + add_compile_definitions(__ARM_FEATURE_FMA) + + set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS}) + string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2") + check_cxx_source_compiles("#include \nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD) + if (GGML_COMPILER_SUPPORT_DOTPROD) + add_compile_definitions(__ARM_FEATURE_DOTPROD) + endif () + check_cxx_source_compiles("#include \nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8) + if (GGML_COMPILER_SUPPORT_MATMUL_INT8) + add_compile_definitions(__ARM_FEATURE_MATMUL_INT8) + endif () + + check_cxx_source_compiles("#include \nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC) + if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC) + add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + endif () + set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV}) + else() + check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E) + if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") + list(APPEND ARCH_FLAGS -mfp16-format=ieee) + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6") + # Raspberry Pi 1, Zero + list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access) + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") + if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android") + # Android armeabi-v7a + list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations) + else() + # Raspberry Pi 2 + list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations) + endif() + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8") + # Android arm64-v8a + # Raspberry Pi 3, 4, Zero 2 (32-bit) + list(APPEND ARCH_FLAGS -mno-unaligned-access) + endif() + if (LLAMA_SVE) + list(APPEND ARCH_FLAGS -march=armv8.6-a+sve) + endif() + endif() +elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR + (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND + CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$")) + message(STATUS "x86 detected") + if (MSVC) + # instruction set detection for MSVC only + if (LLAMA_NATIVE) + include(cmake/FindSIMD.cmake) + endif () + if (LLAMA_AVX512) + list(APPEND ARCH_FLAGS /arch:AVX512) + # MSVC has no compile-time flags enabling specific + # AVX512 extensions, neither it defines the + # macros corresponding to the extensions. + # Do it manually. + if (LLAMA_AVX512_VBMI) + add_compile_definitions($<$:__AVX512VBMI__>) + add_compile_definitions($<$:__AVX512VBMI__>) + endif() + if (LLAMA_AVX512_VNNI) + add_compile_definitions($<$:__AVX512VNNI__>) + add_compile_definitions($<$:__AVX512VNNI__>) + endif() + if (LLAMA_AVX512_BF16) + add_compile_definitions($<$:__AVX512BF16__>) + add_compile_definitions($<$:__AVX512BF16__>) + endif() + elseif (LLAMA_AVX2) + list(APPEND ARCH_FLAGS /arch:AVX2) + elseif (LLAMA_AVX) + list(APPEND ARCH_FLAGS /arch:AVX) + endif() + else() + if (LLAMA_NATIVE) + list(APPEND ARCH_FLAGS -march=native) + endif() + if (LLAMA_F16C) + list(APPEND ARCH_FLAGS -mf16c) + endif() + if (LLAMA_FMA) + list(APPEND ARCH_FLAGS -mfma) + endif() + if (LLAMA_AVX) + list(APPEND ARCH_FLAGS -mavx) + endif() + if (LLAMA_AVX2) + list(APPEND ARCH_FLAGS -mavx2) + endif() + if (LLAMA_AVX512) + list(APPEND ARCH_FLAGS -mavx512f) + list(APPEND ARCH_FLAGS -mavx512bw) + endif() + if (LLAMA_AVX512_VBMI) + list(APPEND ARCH_FLAGS -mavx512vbmi) + endif() + if (LLAMA_AVX512_VNNI) + list(APPEND ARCH_FLAGS -mavx512vnni) + endif() + if (LLAMA_AVX512_BF16) + list(APPEND ARCH_FLAGS -mavx512bf16) + endif() + endif() +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") + message(STATUS "PowerPC detected") + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le") + list(APPEND ARCH_FLAGS -mcpu=powerpc64le) + else() + list(APPEND ARCH_FLAGS -mcpu=native -mtune=native) + #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be) + endif() +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64") + message(STATUS "loongarch64 detected") + + list(APPEND ARCH_FLAGS -march=loongarch64) + if (LLAMA_LASX) + list(APPEND ARCH_FLAGS -mlasx) + endif() + if (LLAMA_LSX) + list(APPEND ARCH_FLAGS -mlsx) + endif() + +else() + message(STATUS "Unknown architecture") +endif() + +add_compile_options("$<$:${ARCH_FLAGS}>") +add_compile_options("$<$:${ARCH_FLAGS}>") + +if (LLAMA_CUDA) + list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS}) + list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument + if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "") + list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED}) + endif() + add_compile_options("$<$:${CUDA_FLAGS}>") +endif() + +if (MINGW) + # Target Windows 8 for PrefetchVirtualMemory + add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER}) +endif() + +# +# POSIX conformance +# + +# clock_gettime came in POSIX.1b (1993) +# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional +# posix_memalign came in POSIX.1-2001 / SUSv3 +# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) +add_compile_definitions(_XOPEN_SOURCE=600) + +# Somehow in OpenBSD whenever POSIX conformance is specified +# some string functions rely on locale_t availability, +# which was introduced in POSIX.1-2008, forcing us to go higher +if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") + remove_definitions(-D_XOPEN_SOURCE=600) + add_compile_definitions(_XOPEN_SOURCE=700) +endif() + +# Data types, macros and functions related to controlling CPU affinity and +# some memory allocation are available on Linux through GNU extensions in libc +if (CMAKE_SYSTEM_NAME MATCHES "Linux") + add_compile_definitions(_GNU_SOURCE) +endif() + +# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, +# and on macOS its availability depends on enabling Darwin extensions +# similarly on DragonFly, enabling BSD extensions is necessary +if ( + CMAKE_SYSTEM_NAME MATCHES "Darwin" OR + CMAKE_SYSTEM_NAME MATCHES "iOS" OR + CMAKE_SYSTEM_NAME MATCHES "tvOS" OR + CMAKE_SYSTEM_NAME MATCHES "DragonFly" +) + add_compile_definitions(_DARWIN_C_SOURCE) +endif() + +# alloca is a non-standard interface that is not visible on BSDs when +# POSIX conformance is specified, but not all of them provide a clean way +# to enable it in such cases +if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + add_compile_definitions(__BSD_VISIBLE) +endif() +if (CMAKE_SYSTEM_NAME MATCHES "NetBSD") + add_compile_definitions(_NETBSD_SOURCE) +endif() +if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") + add_compile_definitions(_BSD_SOURCE) +endif() + +# +# programs +# + +add_subdirectory(llama.cpp) +add_subdirectory(llama-box) diff --git a/Makefile b/Makefile deleted file mode 100644 index aae7df8..0000000 --- a/Makefile +++ /dev/null @@ -1,375 +0,0 @@ -# Inspired by https://github.com/ggerganov/llama.cpp/blob/61665277afde2add00c0d387acb94ed5feb95917/Makefile. - -.DEFAULT_GOAL := build - -SHELL := /bin/bash - -MK_DIR := $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))) -MK_FLAGS:= $(wordlist 3, $(words $(MAKEFLAGS)), $(MAKEFLAGS)) - -# -# System flags -# - -ifndef UNAME_S - UNAME_S := $(shell uname -s) -endif -ifndef UNAME_P - UNAME_P := $(shell uname -p) -endif -ifndef UNAME_M - UNAME_M := $(shell uname -m) -endif - -ifeq ($(origin CC),default) - CC := cc -endif -ifeq ($(origin CXX),default) - CXX := c++ -endif -ifdef LLAMA_CUDA - ifdef LLAMA_CUDA_NVCC - NVCC := $(LLAMA_CUDA_NVCC) - else - NVCC := nvcc - endif -endif - -ifndef LLAMA_NO_CCACHE - CCACHE := $(shell which ccache) - ifdef CCACHE - export CCACHE_SLOPPINESS = time_macros - CC := $(CCACHE) $(CC) - CXX := $(CCACHE) $(CXX) - NVCC := $(CCACHE) $(NVCC) - endif -endif - -## Mac OS + Arm can report x86_64 -## ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 -ifeq ($(UNAME_S),Darwin) - ifndef LLAMA_NO_METAL - LLAMA_METAL := 1 - endif - LLAMA_NO_OPENMP := 1 # OpenMP is not supported on macOS - ifneq ($(UNAME_P),arm) - SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null) - ifeq ($(SYSCTL_M),1) - # UNAME_P := arm - # UNAME_M := arm64 - warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789) - endif - endif -endif -ifdef LLAMA_METAL - MK_FLAGS += " LLAMA_METAL_EMBED_LIBRARY=1" -endif - -# -# Compile flags -# - -## standard -MK_CPPFLAGS = -I$(MK_DIR) -I$(MK_DIR)/llama.cpp -I$(MK_DIR)/llama.cpp/common -MK_CFLAGS = -std=c11 -fPIC -MK_CXXFLAGS = -std=c++11 -fPIC - -## clock_gettime came in POSIX.1b (1993) -## CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional -## posix_memalign came in POSIX.1-2001 / SUSv3 -## M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) -MK_CPPFLAGS += -D_XOPEN_SOURCE=600 - -## Somehow in OpenBSD whenever POSIX conformance is specified -## some string functions rely on locale_t availability, -## which was introduced in POSIX.1-2008, forcing us to go higher -ifeq ($(UNAME_S),OpenBSD) - MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700 -endif - -## RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, -## and on macOS its availability depends on enabling Darwin extensions -## similarly on DragonFly, enabling BSD extensions is necessary -ifeq ($(UNAME_S),Darwin) - MK_CPPFLAGS += -D_DARWIN_C_SOURCE -endif -ifeq ($(UNAME_S),DragonFly) - MK_CPPFLAGS += -D__BSD_VISIBLE -endif - -## alloca is a non-standard interface that is not visible on BSDs when -## POSIX conformance is specified, but not all of them provide a clean way -## to enable it in such cases -ifeq ($(UNAME_S),FreeBSD) - MK_CPPFLAGS += -D__BSD_VISIBLE -endif -ifeq ($(UNAME_S),NetBSD) - MK_CPPFLAGS += -D_NETBSD_SOURCE -endif -ifeq ($(UNAME_S),OpenBSD) - MK_CPPFLAGS += -D_BSD_SOURCE -endif - -## debug or optimization -ifdef LLAMA_DEBUG - MK_CFLAGS += -O0 -g - MK_CXXFLAGS += -O0 -g - MK_LDFLAGS += -g - ifeq ($(UNAME_S),Darwin) - MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS - endif -else - MK_CPPFLAGS += -DNDEBUG - ifdef LLAMA_FAST - MK_CFLAGS += -Ofast - MK_CXXFLAGS += -Ofast - else - MK_CFLAGS += -O3 - MK_CXXFLAGS += -O3 - endif -endif - -## warning -MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function \ - -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \ - -Werror=implicit-function-declaration -MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function \ - -Wmissing-declarations -Wmissing-noreturn -ifdef LLAMA_FATAL_WARNINGS - MK_CFLAGS += -Werror - MK_CXXFLAGS += -Werror -endif - -## os specific -### thread -ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)' - MK_CFLAGS += -pthread - MK_CXXFLAGS += -pthread -endif -### windows -ifneq ($(findstring _NT,$(UNAME_S)),) - _WIN32 := 1 - LWINSOCK2 := -lws2_32 -endif - -## arch specific -ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) - # Use all CPU extensions that are available: - MK_CFLAGS += -march=native -mtune=native - HOST_CXXFLAGS += -march=native -mtune=native - - # Usage AVX-only - #MK_CFLAGS += -mfma -mf16c -mavx - #MK_CXXFLAGS += -mfma -mf16c -mavx - - # Usage SSSE3-only (Not is SSE3!) - #MK_CFLAGS += -mssse3 - #MK_CXXFLAGS += -mssse3 -endif -ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))' - # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves. - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412 - # https://github.com/ggerganov/llama.cpp/issues/2922 - MK_CFLAGS += -Xassembler -muse-unaligned-vector-move - MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move - - # Target Windows 8 for PrefetchVirtualMemory - MK_CPPFLAGS += -D_WIN32_WINNT=0x602 -endif -ifneq ($(filter aarch64%,$(UNAME_M)),) - # Apple M1, M2, etc. - # Raspberry Pi 3, 4, Zero 2 (64-bit) - # Nvidia Jetson - MK_CFLAGS += -mcpu=native - MK_CXXFLAGS += -mcpu=native - JETSON_RELEASE_INFO = $(shell jetson_release) - ifdef JETSON_RELEASE_INFO - ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),) - CC = aarch64-unknown-linux-gnu-gcc - cxx = aarch64-unknown-linux-gnu-g++ - endif - endif -endif -ifneq ($(filter armv6%,$(UNAME_M)),) - # Raspberry Pi 1, Zero - MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access - MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -endif -ifneq ($(filter armv7%,$(UNAME_M)),) - # Raspberry Pi 2 - MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations - MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations -endif -ifneq ($(filter armv8%,$(UNAME_M)),) - # Raspberry Pi 3, 4, Zero 2 (32-bit) - MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access - MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access -endif -ifneq ($(filter ppc64%,$(UNAME_M)),) - POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) - ifneq (,$(findstring POWER9,$(POWER9_M))) - MK_CFLAGS += -mcpu=power9 - MK_CXXFLAGS += -mcpu=power9 - endif -endif -ifneq ($(filter ppc64le%,$(UNAME_M)),) - MK_CFLAGS += -mcpu=powerpc64le - MK_CXXFLAGS += -mcpu=powerpc64le - CUDA_POWER_ARCH = 1 -endif -ifneq ($(filter loongarch64%,$(UNAME_M)),) - MK_CFLAGS += -mlasx - MK_CXXFLAGS += -mlasx -endif -ifneq ($(filter riscv64%,$(UNAME_M)),) - MK_CFLAGS += -march=rv64gcv -mabi=lp64d - MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d -endif - -## platform specific -### apple metal -ifdef LLAMA_METAL - MK_LDFLAGS += -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders -endif -### cuda -ifdef LLAMA_CUDA - ifneq ($(wildcard /opt/cuda),) - CUDA_PATH ?= /opt/cuda - else - CUDA_PATH ?= /usr/local/cuda - endif - MK_CPPFLAGS += -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include - MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib - ifneq ($(filter aarch64%,$(UNAME_M)),) - ifneq ($(wildcard $(CUDA_PATH)/targets/sbsa-linux),) - MK_CPPFLAGS += -I$(CUDA_PATH)/targets/sbsa-linux/include - MK_LDFLAGS += -L$(CUDA_PATH)/targets/sbsa-linux/lib - endif - endif -endif -### hipblas -ifdef LLAMA_HIPBLAS - ifeq ($(wildcard /opt/rocm),) - ROCM_PATH ?= /usr - else - ROCM_PATH ?= /opt/rocm - endif - MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib - MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64 - MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu -endif -### openmp -ifndef LLAMA_NO_OPENMP - # OpenMP cannot be statically linked. - MK_CFLAGS += -fopenmp - MK_CXXFLAGS += -fopenmp -endif -### openblas -ifdef LLAMA_OPENBLAS - MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas) - MK_LDFLAGS += $(shell pkg-config --libs openblas) -endif -### openblas64 -ifdef LLAMA_OPENBLAS64 - MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64) - MK_LDFLAGS += $(shell pkg-config --libs openblas64) -endif -### blis -ifdef LLAMA_BLIS - MK_LDFLAGS += -lblis -L/usr/local/lib -endif - -## get compiler flags -GF_CC := $(CC) -ifdef LLAMA_CUDA - GF_CC := $(NVCC) -std=c++11 2>/dev/null .c -Xcompiler -endif -include $(MK_DIR)/llama.cpp/scripts/get-flags.mk - -## combine build flags with cmdline overrides -override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) -override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS) -override CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS) -override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS) - -# -# Helper function -# - -## BUILD_INFO prints out the build info -define BUILD_INFO - @echo "I llama-box build info:" - @echo "I UNAME_S: $(UNAME_S)" - @echo "I UNAME_P: $(UNAME_P)" - @echo "I UNAME_M: $(UNAME_M)" - @echo "I CFLAGS: $(CFLAGS)" - @echo "I CXXFLAGS: $(CXXFLAGS)" - @echo "I LDFLAGS: $(LDFLAGS)" - @echo "I CC: $(shell $(CC) --version | head -n 1)" - @echo "I CXX: $(shell $(CXX) --version | head -n 1)" - @echo -endef - -## GET_OBJ_FILE replaces .c, .cpp, and .cu file endings with .o -define GET_OBJ_FILE - $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1)))) -endef - -# -# Main function -# - -## -## clean -## - -.PHONY: clean -clean: - @echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - @echo "I cleaning llama.cpp" - @echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - make -C $(MK_DIR)/llama.cpp -j $(MK_FLAGS) clean - rm -f $(MK_DIR)/llama.cpp/ggml-metal-embed.metal - @echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - @echo "I cleaning llama-box" - @echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - $(call BUILD_INFO) - rm -rf $(MK_DIR)/build/bin - find $(MK_DIR)/llama-box -type f -name "*.o" -delete - rm -f $(MK_DIR)/llama-box/version.cpp - -## -## build -## - -llama.cpp/libllama.a: - @echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - @echo "I building llama.cpp" - @echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - make -C $(MK_DIR)/llama.cpp -j $(MK_FLAGS) libllama.a - -llama-box/version.cpp: $(wildcard .git/index) llama-box/scripts/version.sh - @sh $(MK_DIR)/llama-box/scripts/version.sh > $@.tmp - @if ! cmp -s $@ $@.tmp; then mv $@.tmp $@; else rm $@.tmp; fi - -llama-box/version.o: llama-box/version.cpp - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - -llama-box: llama-box/main.cpp llama-box/version.o llama.cpp/libllama.a - @echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - @echo "I building llama.cpp" - @echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - make -C $(MK_DIR)/llama.cpp -j $(MK_FLAGS) libllama.a - @echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" - @echo "I building llama-box" - @echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" -ifeq ($(_WIN32),1) -SUFFIX := .exe -endif - $(call BUILD_INFO) - mkdir -p $(MK_DIR)/build/bin - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(wildcard llama.cpp/*.o) $(wildcard llama.cpp/ggml-cuda/*.o) $(wildcard llama.cpp/ggml-cuda/template-instances/*.o) $(filter-out %.h %.hpp %.a $<,$^) $(call GET_OBJ_FILE, $<) -o $(MK_DIR)/build/bin/$@$(SUFFIX) $(LDFLAGS) $(LWINSOCK2) - -.PHONY: build -build: llama-box diff --git a/README.md b/README.md index 431760a..2f2db7a 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![ci status](https://github.com/thxcode/llama-box/actions/workflows/ci.yml/badge.svg)](https://github.com/thxcode/llama-box/actions/workflows/ci.yml)
-LLaMA box is a cleaning LLMs inference server rather +LLaMA box is a clean LLMs inference server rather than [llama-server](https://github.com/ggerganov/llama.cpp/blob/master/examples/server). ## Usage diff --git a/llama-box/CMakeLists.txt b/llama-box/CMakeLists.txt new file mode 100644 index 0000000..131ba9e --- /dev/null +++ b/llama-box/CMakeLists.txt @@ -0,0 +1,53 @@ +# llama-box + +# +# version +# +if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git") + set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.git") + + # Is git submodule + if (NOT IS_DIRECTORY "${GIT_DIR}") + file(READ ${GIT_DIR} REAL_GIT_DIR_LINK) + string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK}) + string(FIND "${REAL_GIT_DIR}" "." SLASH_POS) + if (SLASH_POS EQUAL 0) + set(GIT_DIR "${REAL_GIT_DIR}") + else () + set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}") + endif () + endif () + + if (EXISTS "${GIT_DIR}/index") + set(GIT_INDEX "${GIT_DIR}/index") + else () + set(GIT_INDEX "") + endif () +else () + set(GIT_INDEX "") +endif () +add_custom_command( + OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/version.cpp" + COMMENT "Generating build details from Git" + COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION} + -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen-version-cpp.cmake" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/version.cpp.in" ${GIT_INDEX} + VERBATIM +) +set(TARGET version) +add_library(${TARGET} OBJECT version.cpp) + +# +# llama-box +# +set(TARGET llama-box) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +add_executable(${TARGET} main.cpp param.hpp ratelimiter.hpp utils.hpp) +target_link_libraries(${TARGET} PRIVATE version common ${CMAKE_THREAD_LIBS_INIT}) +target_include_directories(${TARGET} PUBLIC ${CMAKE_SOURCE_DIR}) +if (WIN32) + TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32) +endif() +target_compile_features(${TARGET} PUBLIC cxx_std_11) diff --git a/llama-box/param.hpp b/llama-box/param.hpp index bdb97e7..d48fe64 100644 --- a/llama-box/param.hpp +++ b/llama-box/param.hpp @@ -11,7 +11,6 @@ #include "llama.cpp/llama.h" // version -extern const char *LLAMA_BOX_BUILD_DATE; extern const char *LLAMA_BOX_GIT_TREE_STATE; extern const char *LLAMA_BOX_GIT_VERSION; extern const char *LLAMA_BOX_GIT_COMMIT; diff --git a/llama-box/scripts/gen-version-cpp.cmake b/llama-box/scripts/gen-version-cpp.cmake new file mode 100644 index 0000000..a63b40c --- /dev/null +++ b/llama-box/scripts/gen-version-cpp.cmake @@ -0,0 +1,85 @@ +set(GIT_TREE_STATE "unknown") +set(GIT_COMMIT "unknown") +set(GIT_VERSION "unknown") + +# Look for git +find_package(Git) +if (NOT Git_FOUND) + find_program(GIT_EXECUTABLE NAMES git git.exe) + if (GIT_EXECUTABLE) + set(Git_FOUND TRUE) + endif () +endif () + +# Get the commit count and hash +if (Git_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} status --porcelain + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE TREE_STATE + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RES + ) + if (RES EQUAL 0) + if (TREE_STATE) + set(GIT_TREE_STATE "dirty") + else () + set(GIT_TREE_STATE "clean") + endif () + endif () + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE HEAD + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RES + ) + if (RES EQUAL 0) + set(GIT_COMMIT ${HEAD}) + endif () + if (DEFINED ENV{VERSION}) + set(GIT_VERSION $ENV{VERSION}) + else () + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RES + ) + if (RES EQUAL 0) + if (GIT_TREE_STATE STREQUAL "dirty") + set(GIT_VERSION "dev") + else() + set(GIT_VERSION ${VERSION}) + endif () + endif () + endif () +endif () + +message(STATUS "Git tree state: ${GIT_TREE_STATE}") +message(STATUS "Git commit: ${GIT_COMMIT}") +message(STATUS "Git version: ${GIT_VERSION}") + +set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/version.cpp.in") +set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/version.cpp") + +# Only write the version if it changed +if(EXISTS ${OUTPUT_FILE}) + file(READ ${OUTPUT_FILE} CONTENTS) + string(REGEX MATCH "LLAMA_BOX_GIT_TREE_STATE = \"([^\"]*)\";" _ ${CONTENTS}) + set(OLD_GIT_TREE_STATE ${CMAKE_MATCH_1}) + string(REGEX MATCH "LLAMA_BOX_GIT_COMMIT = \"([^\"]*)\";" _ ${CONTENTS}) + set(OLD_GIT_COMMIT ${CMAKE_MATCH_1}) + string(REGEX MATCH "LLAMA_BOX_GIT_VERSION = \"([^\"]*)\";" _ ${CONTENTS}) + set(OLD_GIT_VERSION ${CMAKE_MATCH_1}) + if ( + NOT OLD_GIT_TREE_STATE STREQUAL GIT_TREE_STATE OR + NOT OLD_GIT_COMMIT STREQUAL GIT_COMMIT OR + NOT OLD_GIT_VERSION STREQUAL GIT_VERSION + ) + configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) + endif() +else() + configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) +endif() diff --git a/llama-box/scripts/version.sh b/llama-box/scripts/version.sh deleted file mode 100755 index 556339d..0000000 --- a/llama-box/scripts/version.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/sh - -## -# Inspired by github.com/kubernetes/kubernetes/hack/lib/version.sh -## - -# ----------------------------------------------------------------------------- -# Version management helpers. These functions help to set the -# following variables: -# -# GIT_TREE_STATE - "clean" indicates no changes since the git commit id. -# "dirty" indicates source code changes after the git commit id. -# "unknown" indicates cannot find out the git tree. -# GIT_COMMIT - The git commit id corresponding to this -# source code. -# GIT_VERSION - "vX.Y" used to indicate the last release version, -# it can be specified via "VERSION". -# BUILD_DATE - The build date of the version. - -BUILD_DATE=$(date -u '+%Y-%m-%dT%H:%M:%SZ') -GIT_TREE_STATE="unknown" -GIT_COMMIT="unknown" -GIT_VERSION="unknown" - -# return directly if not found git client. -if [ -z "$(command -v git)" ]; then - # respect specified version. - GIT_VERSION=${VERSION:-${GIT_VERSION}} - return -fi - -# find out git info via git client. -if GIT_COMMIT=$(git rev-parse "HEAD^{commit}" 2>/dev/null); then - # specify as dirty if the tree is not clean. - if git_status=$(git status --porcelain 2>/dev/null) && [ -n "${git_status}" ]; then - GIT_TREE_STATE="dirty" - else - GIT_TREE_STATE="clean" - fi - - # specify with the tag if the head is tagged. - if GIT_VERSION="$(git rev-parse --abbrev-ref HEAD 2>/dev/null)"; then - if git_tag=$(git tag -l --contains HEAD 2>/dev/null | head -n 1 2>/dev/null) && [ -n "${git_tag}" ]; then - GIT_VERSION="${git_tag}" - fi - fi - - # specify to dev if the tree is dirty. - if [ "${GIT_TREE_STATE:-dirty}" = "dirty" ]; then - GIT_VERSION="dev" - fi - - # respect specified version - GIT_VERSION=${VERSION:-${GIT_VERSION}} -fi - -echo "char const *LLAMA_BOX_BUILD_DATE = \"${BUILD_DATE:-0}\";" -echo "char const *LLAMA_BOX_GIT_TREE_STATE = \"${GIT_TREE_STATE}\";" -echo "char const *LLAMA_BOX_GIT_COMMIT = \"${GIT_COMMIT}\";" -echo "char const *LLAMA_BOX_GIT_VERSION = \"${GIT_VERSION}\";" diff --git a/llama-box/version.cpp.in b/llama-box/version.cpp.in new file mode 100644 index 0000000..042c770 --- /dev/null +++ b/llama-box/version.cpp.in @@ -0,0 +1,3 @@ +char const *LLAMA_BOX_GIT_TREE_STATE = "@GIT_TREE_STATE@"; +char const *LLAMA_BOX_GIT_COMMIT = "@GIT_COMMIT@"; +char const *LLAMA_BOX_GIT_VERSION = "@GIT_VERSION@";