Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

b3341 #216

Merged
merged 14 commits into from
Jul 8, 2024
Merged

b3341 #216

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,22 @@ let
ps.tiktoken
ps.torchWithoutCuda
ps.transformers

# server bench
ps.matplotlib

# server tests
ps.openai
ps.behave
ps.prometheus-client

# for examples/pydantic-models-to-grammar-examples.py
ps.docstring-parser
ps.pydantic

# for scripts/compare-llama-bench.py
ps.gitpython
ps.tabulate
]
);

Expand Down
38 changes: 38 additions & 0 deletions .github/workflows/python-type-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Python Type-Check

on:
push:
paths:
- '.github/workflows/python-type-check.yml'
- '**.py'
- '**/requirements*.txt'
pull_request:
paths:
- '.github/workflows/python-type-check.yml'
- '**.py'
- '**/requirements*.txt'

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true

jobs:
python-type-check:
runs-on: ubuntu-latest
name: pyright type-check
steps:
- name: Check out source repository
uses: actions/checkout@v4
- name: Set up Python environment
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Python dependencies
# TODO: use a venv
run: pip install -r requirements/requirements-all.txt
- name: Type-check with Pyright
uses: jakebailey/pyright-action@v2
with:
version: 1.1.370
level: warning
warnings: true
18 changes: 18 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ BUILD_TARGETS = \
llama-finetune \
llama-gbnf-validator \
llama-gguf \
llama-gguf-hash \
llama-gguf-split \
llama-gritlm \
llama-imatrix \
Expand Down Expand Up @@ -1178,6 +1179,23 @@ llama-gguf: examples/gguf/gguf.cpp \
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

examples/gguf-hash/deps/sha1/sha1.o: \
examples/gguf-hash/deps/sha1/sha1.c
$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@

examples/gguf-hash/deps/xxhash/xxhash.o: \
examples/gguf-hash/deps/xxhash/xxhash.c
$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@

examples/gguf-hash/deps/sha256/sha256.o: \
examples/gguf-hash/deps/sha256/sha256.c
$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@

llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o\
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

llama-gguf-split: examples/gguf-split/gguf-split.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ Typically finetunes of the base models below are supported as well.
- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B)
- [x] [OLMo](https://allenai.org/olmo)
- [x] [GPT-NeoX](https://github.com/EleutherAI/gpt-neox) + [Pythia](https://github.com/EleutherAI/pythia)
- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b)

(instructions for supporting more models: [HOWTO-add-model.md](./docs/HOWTO-add-model.md))

Expand Down Expand Up @@ -131,6 +132,7 @@ Typically finetunes of the base models below are supported as well.
- Zig: [deins/llama.cpp.zig](https://github.com/Deins/llama.cpp.zig)
- Flutter/Dart: [netdur/llama_cpp_dart](https://github.com/netdur/llama_cpp_dart)
- PHP (API bindings and features built on top of llama.cpp): [distantmagic/resonance](https://github.com/distantmagic/resonance) [(more info)](https://github.com/ggerganov/llama.cpp/pull/6326)
- Guile Scheme: [guile_llama_cpp](https://savannah.nongnu.org/projects/guile-llama-cpp)

**UI:**

Expand Down
20 changes: 20 additions & 0 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ function gg_run_ctest_debug {

set -e

# Check cmake, make and ctest are installed
gg_check_build_requirements

(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log

Expand Down Expand Up @@ -131,6 +134,9 @@ function gg_run_ctest_release {

set -e

# Check cmake, make and ctest are installed
gg_check_build_requirements

(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log

Expand Down Expand Up @@ -701,6 +707,20 @@ function gg_run_embd_bge_small {
set +e
}

function gg_check_build_requirements {
if ! command -v cmake &> /dev/null; then
gg_printf 'cmake not found, please install'
fi

if ! command -v make &> /dev/null; then
gg_printf 'make not found, please install'
fi

if ! command -v ctest &> /dev/null; then
gg_printf 'ctest not found, please install'
fi
}

function gg_sum_embd_bge_small {
gg_printf '### %s\n\n' "${ci}"

Expand Down
2 changes: 1 addition & 1 deletion common/log.h
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ inline std::string LOG_TOKENS_TOSTR_PRETTY(const C & ctx, const T & tokens)
buf << "[ ";

bool first = true;
for (const auto &token : tokens)
for (const auto & token : tokens)
{
if (!first) {
buf << ", ";
Expand Down
5 changes: 3 additions & 2 deletions common/sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,6 @@ static llama_token llama_sampling_sample_impl(
GGML_ASSERT(!original_logits.empty());
}
llama_token id = 0;
// Get a pointer to the logits
float * logits = llama_get_logits_ith(ctx_main, idx);

if (temp < 0.0) {
// greedy sampling, with probs
Expand Down Expand Up @@ -324,6 +322,9 @@ static llama_token llama_sampling_sample_impl(
}

if (ctx_sampling->grammar != NULL && !is_resampling) {
// Get a pointer to the logits
float * logits = llama_get_logits_ith(ctx_main, idx);

// Create an array with a single token data element for the sampled id
llama_token_data single_token_data = {id, logits[id], 0.0f};
llama_token_data_array single_token_data_array = { &single_token_data, 1, false };
Expand Down
Loading
Loading