Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
mofosyne authored May 10, 2024
2 parents 849cb13 + 8c570c9 commit 2cb9174
Show file tree
Hide file tree
Showing 275 changed files with 76,222 additions and 55,701 deletions.
4 changes: 3 additions & 1 deletion .devops/full-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
ARG CUDA_DOCKER_ARCH=all

RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev

COPY requirements.txt requirements.txt
COPY requirements requirements
Expand All @@ -28,6 +28,8 @@ COPY . .
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV LLAMA_CUDA=1
# Enable cURL
ENV LLAMA_CURL=1

RUN make

Expand Down
5 changes: 5 additions & 0 deletions .devops/full-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ ENV LLAMA_HIPBLAS=1
ENV CC=/opt/rocm/llvm/bin/clang
ENV CXX=/opt/rocm/llvm/bin/clang++

# Enable cURL
ENV LLAMA_CURL=1
RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev

RUN make

ENTRYPOINT ["/app/.devops/tools.sh"]
5 changes: 4 additions & 1 deletion .devops/full.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION as build

RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip git
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev

COPY requirements.txt requirements.txt
COPY requirements requirements
Expand All @@ -15,6 +15,9 @@ WORKDIR /app

COPY . .

ENV LLAMA_CURL=1


RUN make

ENV LC_ALL=C.utf8
Expand Down
8 changes: 3 additions & 5 deletions .devops/main-intel.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,12 @@ WORKDIR /app

COPY . .

RUN mkdir build && \
cd build && \
if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
echo "LLAMA_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
fi && \
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
cmake --build . --config Release --target main
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
cmake --build build --config Release --target main

FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime

Expand Down
6 changes: 2 additions & 4 deletions .devops/main-vulkan.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
# Build it
WORKDIR /app
COPY . .
RUN mkdir build && \
cd build && \
cmake .. -DLLAMA_VULKAN=1 && \
cmake --build . --config Release --target main
RUN cmake -B build -DLLAMA_VULKAN=1 && \
cmake --build build --config Release --target main

# Clean up
WORKDIR /
Expand Down
7 changes: 6 additions & 1 deletion .devops/server-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
ARG CUDA_DOCKER_ARCH=all

RUN apt-get update && \
apt-get install -y build-essential git
apt-get install -y build-essential git libcurl4-openssl-dev

WORKDIR /app

Expand All @@ -22,11 +22,16 @@ COPY . .
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable CUDA
ENV LLAMA_CUDA=1
# Enable cURL
ENV LLAMA_CURL=1

RUN make

FROM ${BASE_CUDA_RUN_CONTAINER} as runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev

COPY --from=build /app/server /server

ENTRYPOINT [ "/server" ]
13 changes: 7 additions & 6 deletions .devops/server-intel.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,24 @@ FROM intel/oneapi-basekit:$ONEAPI_VERSION as build

ARG LLAMA_SYCL_F16=OFF
RUN apt-get update && \
apt-get install -y git
apt-get install -y git libcurl4-openssl-dev

WORKDIR /app

COPY . .

RUN mkdir build && \
cd build && \
if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
echo "LLAMA_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
fi && \
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
cmake --build . --config Release --target server
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
cmake --build build --config Release --target server

FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev

COPY --from=build /app/build/bin/server /server

ENV LC_ALL=C.utf8
Expand Down
5 changes: 5 additions & 0 deletions .devops/server-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ ENV LLAMA_HIPBLAS=1
ENV CC=/opt/rocm/llvm/bin/clang
ENV CXX=/opt/rocm/llvm/bin/clang++

# Enable cURL
ENV LLAMA_CURL=1
RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev

RUN make

ENTRYPOINT [ "/app/server" ]
10 changes: 6 additions & 4 deletions .devops/server-vulkan.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
apt update -y && \
apt-get install -y vulkan-sdk

# Install cURL
RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev

# Build it
WORKDIR /app
COPY . .
RUN mkdir build && \
cd build && \
cmake .. -DLLAMA_VULKAN=1 && \
cmake --build . --config Release --target server
RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
cmake --build build --config Release --target server

# Clean up
WORKDIR /
Expand Down
7 changes: 6 additions & 1 deletion .devops/server.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,21 @@ ARG UBUNTU_VERSION=22.04
FROM ubuntu:$UBUNTU_VERSION as build

RUN apt-get update && \
apt-get install -y build-essential git
apt-get install -y build-essential git libcurl4-openssl-dev

WORKDIR /app

COPY . .

ENV LLAMA_CURL=1

RUN make

FROM ubuntu:$UBUNTU_VERSION as runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev

COPY --from=build /app/server /server

ENV LC_ALL=C.utf8
Expand Down
16 changes: 15 additions & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
[flake8]
max-line-length = 125
ignore = W503
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
exclude =
# Do not traverse examples
examples,
# Do not include package initializers
__init__.py,
# No need to traverse our git directory
.git,
# There's no value in checking cache directories
__pycache__,
# No need to include the build path
build,
# This contains builds that we don't want to check
dist # This is generated with `python build .` for package releases
# max-complexity = 10
81 changes: 56 additions & 25 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ on:
push:
branches:
- master
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
pull_request_target:
types: [opened, synchronize, reopened]
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
schedule:
- cron: '04 2 * * *'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
cancel-in-progress: true

jobs:
Expand All @@ -42,11 +42,33 @@ jobs:
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
N_USERS: 8
DURATION: 10m
if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }}

strategy:
matrix:
model: [phi-2]
ftype: [q4_0, q8_0, f16]
include:
- model: phi-2
ftype: q4_0
pr_comment_enabled: "true"

if: |
inputs.gpu-series == 'Standard_NC4as_T4_v3'
|| (
github.event_name == 'schedule'
&& github.ref_name == 'master'
&& github.repository_owner == 'ggerganov'
)
|| github.event_name == 'pull_request_target'
|| (
github.event_name == 'push'
&& github.event.ref == 'refs/heads/master'
&& github.repository_owner == 'ggerganov'
)
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
Expand All @@ -69,20 +91,24 @@ jobs:
sleep 0.1
done
- name: Install k6
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.21'

- name: Install k6 and xk6-sse
id: k6_installation
run: |
cd examples/server/bench
wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz
tar xzf k6*.tar.gz --strip-components=1
go install go.k6.io/xk6/cmd/xk6@latest
xk6 build master \
--with github.com/phymbert/xk6-sse
- name: Build
id: cmake_build
run: |
set -eux
mkdir build
cd build
cmake .. \
cmake -B build \
-DLLAMA_NATIVE=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DLLAMA_CURL=ON \
Expand All @@ -93,7 +119,7 @@ jobs:
-DLLAMA_FATAL_WARNINGS=OFF \
-DLLAMA_ALL_WARNINGS=OFF \
-DCMAKE_BUILD_TYPE=Release;
cmake --build . --config Release -j $(nproc) --target server
cmake --build build --config Release -j $(nproc) --target server
- name: Download the dataset
id: download_dataset
Expand All @@ -108,15 +134,15 @@ jobs:
cd examples/server/bench
source venv/bin/activate
BENCH_K6_BIN_PATH=./k6 python bench.py \
python bench.py \
--runner-label ${{ env.RUNNER_LABEL }} \
--name ${{ github.job }} \
--branch ${{ github.head_ref || github.ref_name }} \
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
--scenario script.js \
--duration ${{ github.event.inputs.duration || env.DURATION }} \
--hf-repo ggml-org/models \
--hf-file phi-2/ggml-model-q4_0.gguf \
--hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
--model-path-prefix /models \
--parallel ${{ env.N_USERS }} \
-ngl 33 \
Expand All @@ -134,7 +160,7 @@ jobs:
- uses: actions/upload-artifact@v4
with:
name: benchmark-results
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
compression-level: 9
path: |
examples/server/bench/*.jpg
Expand All @@ -146,7 +172,7 @@ jobs:
with:
authToken: ${{secrets.GITHUB_TOKEN}}
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
context: bench-server-baseline
context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
description: |
${{ env.BENCH_RESULTS }}
state: 'success'
Expand Down Expand Up @@ -203,21 +229,26 @@ jobs:
- name: Comment PR
uses: mshick/add-pr-comment@v2
id: comment_pr
if: ${{ github.event.pull_request != '' }}
if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
with:
message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
message: |
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
<p align="center">
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_TOKENS_P_90_ }}tk/s **total=${{ env.LLAMACPP_PROMPT_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
- ${{ env.BENCH_GRAPH_XLABEL }}
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
</p>
<details>
<summary>Time series</summary>
<summary>Expand details for performance related PR only</summary>
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
- ${{ env.BENCH_GRAPH_XLABEL }}
<p align="center">
Expand Down
Loading

0 comments on commit 2cb9174

Please sign in to comment.