Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into sl/remove-task-type
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Jun 23, 2024
2 parents 486d061 + 6a2f298 commit 9b00a7e
Show file tree
Hide file tree
Showing 47 changed files with 22,917 additions and 21,015 deletions.
1 change: 1 addition & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,5 @@ indent_size = 2
indent_style = tab

[examples/cvector-generator/*.txt]
trim_trailing_whitespace = unset
insert_final_newline = unset
2 changes: 1 addition & 1 deletion .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:

strategy:
matrix:
sanitizer: [ADDRESS, THREAD, UNDEFINED]
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
build_type: [RelWithDebInfo]
include:
- build_type: Release
Expand Down
109 changes: 71 additions & 38 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,90 +1,123 @@
*.o
# Extensions

*.a
*.so
*.gguf
*.gguf.json
*.bat
*.bin
*.exe
*.dll
*.log
*.gcov
*.gcno
*.gcda
*.dot
*.bat
*.tmp
*.metallib
*.etag
*.exe
*.gcda
*.gcno
*.gcov
*.gguf
*.gguf.json
*.lastModified
.DS_Store
.build/
*.log
*.metallib
*.o
*.so
*.tmp

# IDE / OS

.cache/
.ccls-cache/
.direnv/
.DS_Store
.envrc
.idea/
.swiftpm
.venv
.clang-tidy
.vs/
.vscode/
.idea/
nppBackup

ggml-metal-embed.metal

lcov-report/
# Coverage

gcovr-report/
lcov-report/

# Build Artifacts

tags
.build/
build*
!build-info.cmake
!build-info.cpp.in
!build-info.sh
!build.zig
cmake-build-*
/libllama.so
/llama-*
android-ndk-*
arm_neon.h
cmake-build-*
CMakeSettings.json
compile_commands.json
ggml-metal-embed.metal
llama-batched-swift
out/
tmp/

# CI

!.github/workflows/*.yml

# Models

models/*
models-mnt
!models/.editorconfig
!models/ggml-vocab-*.gguf*

/Pipfile
/libllama.so
/llama-*
llama-batched-swift
/common/build-info.cpp
arm_neon.h
compile_commands.json
CMakeSettings.json

__pycache__
dist
# Zig

zig-out/
zig-cache/

# Logs

ppl-*.txt
qnt-*.txt
perf-*.txt

# Examples

examples/jeopardy/results.txt
examples/server/*.css.hpp
examples/server/*.html.hpp
examples/server/*.js.hpp
examples/server/*.mjs.hpp
examples/server/*.css.hpp
!build_64.sh
!examples/*.bat
!examples/*/*.kts
!examples/*/*/*.kts
!examples/sycl/*.bat
!examples/sycl/*.sh

# Python

__pycache__
.venv
/Pipfile
dist
poetry.lock
poetry.toml
nppBackup

# Test binaries
/tests/test-grammar-parser
/tests/test-llama-grammar
/tests/test-backend-ops
/tests/test-double-float
/tests/test-grad0
/tests/test-grammar-parser
/tests/test-llama-grammar
/tests/test-opt
/tests/test-quantize-fns
/tests/test-quantize-perf
/tests/test-rope
/tests/test-sampling
/tests/test-tokenizer-0
/tests/test-tokenizer-1-spm
/tests/test-tokenizer-1-bpe
/tests/test-rope
/tests/test-backend-ops
/tests/test-tokenizer-1-spm

# Scripts
!/scripts/install-oneapi.bat
7 changes: 4 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ if (LLAMA_SYCL)
#todo: AOT

find_package(IntelSYCL REQUIRED)
find_package(MKL REQUIRED)

message(STATUS "SYCL found")

Expand All @@ -679,11 +680,9 @@ if (LLAMA_SYCL)
endif()

add_compile_options(-I./) #include DPCT
add_compile_options(-I/${SYCL_INCLUDE_DIR})

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
endif()
Expand All @@ -693,8 +692,10 @@ if (LLAMA_SYCL)
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")

if (WIN32)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
else()
add_compile_options(-I/${SYCL_INCLUDE_DIR})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
Expand Down
31 changes: 23 additions & 8 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,21 @@
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
}
},

{
"name": "sycl-base",
"hidden": true,
"generator": "Ninja",
"binaryDir": "${sourceDir}/build-${presetName}",
"cacheVariables": {
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"CMAKE_CXX_COMPILER": "icx",
"LLAMA_SYCL": "ON",
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
}
},
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
{ "name": "static", "hidden": true, "cacheVariables": { "LLAMA_STATIC": "ON" } },

{
Expand All @@ -35,15 +47,18 @@
},

{ "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "release" ] },
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "release", "static" ] },
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },

{ "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "release" ] },
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "release", "static" ] },
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },

{ "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "release" ] },
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "release", "static" ] }
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },

{ "name": "x64-windows-sycl-debug" , "inherits": [ "sycl-base", "debug" ] },
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }
]
}
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,7 @@ tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

tests/test-grammar-integration: tests/test-grammar-integration.cpp ggml.o llama.o grammar-parser.o $(OBJS)
tests/test-grammar-integration: tests/test-grammar-integration.cpp json-schema-to-grammar.o ggml.o llama.o grammar-parser.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

Expand Down
30 changes: 19 additions & 11 deletions README-sycl.md
Original file line number Diff line number Diff line change
Expand Up @@ -410,15 +410,9 @@ Output (example):

4. Install build tools

a. Download & install cmake for Windows: https://cmake.org/download/
a. Download & install cmake for Windows: https://cmake.org/download/ (CMake can also be installed from Visual Studio Installer)
b. The new Visual Studio will install Ninja as default. (If not, please install it manually: https://ninja-build.org/)

b. Download & install mingw-w64 make for Windows provided by w64devkit

- Download the 1.19.0 version of [w64devkit](https://github.com/skeeto/w64devkit/releases/download/v1.19.0/w64devkit-1.19.0.zip).

- Extract `w64devkit` on your pc.

- Add the **bin** folder path in the Windows system PATH environment (for e.g. `C:\xxx\w64devkit\bin\`).

### II. Build llama.cpp

Expand All @@ -428,10 +422,10 @@ On the oneAPI command line window, step into the llama.cpp main directory and ru
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
# Option 1: Use FP32 (recommended for better performance in most cases)
cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
cmake -B build -G "Ninja" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
# Option 2: Or FP16
cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
cmake -B build -G "Ninja" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
cmake --build build --config Release -j
```
Expand All @@ -441,9 +435,23 @@ Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former in
.\examples\sycl\win-build-sycl.bat
```

Or, use CMake presets to build:
```sh
cmake --preset x64-windows-sycl-release
cmake --build build-x64-windows-sycl-release -j --target llama-cli

cmake -DLLAMA_SYCL_F16=ON --preset x64-windows-sycl-release
cmake --build build-x64-windows-sycl-release -j --target llama-cli

cmake --preset x64-windows-sycl-debug
cmake --build build-x64-windows-sycl-debug -j --target llama-cli
```

Or, you can use Visual Studio to open llama.cpp folder as a CMake project. Choose the sycl CMake presets (`x64-windows-sycl-release` or `x64-windows-sycl-debug`) before you compile the project.

*Notes:*

- By default, calling `make` will build all target binary files. In case of a minimal experimental setup, the user can build the inference executable only through `make llama-cli`.
- In case of a minimal experimental setup, the user can build the inference executable only through `cmake --build build --config Release -j --target llama-cli`.

### III. Run the inference

Expand Down
16 changes: 12 additions & 4 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "llama.h"

#include <algorithm>
#include <cassert>
#include <cinttypes>
#include <cmath>
#include <codecvt>
Expand Down Expand Up @@ -542,6 +541,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
/**/ if (value == "none") { params.pooling_type = LLAMA_POOLING_TYPE_NONE; }
else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; }
else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; }
else if (value == "last") { params.pooling_type = LLAMA_POOLING_TYPE_LAST; }
else { invalid_param = true; }
return true;
}
Expand Down Expand Up @@ -1870,6 +1870,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param

options.push_back({ "backend" });
options.push_back({ "*", " --rpc SERVERS", "comma separated list of RPC servers" });

if (llama_supports_mlock()) {
options.push_back({ "*", " --mlock", "force system to keep model in RAM rather than swapping or compressing" });
}
Expand Down Expand Up @@ -1988,8 +1989,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "cvector", " --completions-file FNAME",
"completions file (default: '%s')", params.cvector_completions_file.c_str() });
options.push_back({ "cvector", " --completions N", "number of lines of completions file to use (default: %d)", params.n_completions });
options.push_back({ "cvector", " --batch-pca N", "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
options.push_back({ "cvector", " --iter-pca N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
options.push_back({ "cvector", " --pca-batch N", "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
options.push_back({ "cvector", " --pca-iter N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });

printf("usage: %s [options]\n", argv[0]);

Expand Down Expand Up @@ -2657,7 +2658,14 @@ static bool llama_download_file(const std::string & url, const std::string & pat
}

// Set the output file
std::unique_ptr<FILE, decltype(&fclose)> outfile(fopen(path_temporary.c_str(), "wb"), fclose);

struct FILE_deleter {
void operator()(FILE * f) const {
fclose(f);
}
};

std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "wb"));
if (!outfile) {
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path.c_str());
return false;
Expand Down
4 changes: 2 additions & 2 deletions convert-hf-to-gguf-update.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,15 +214,15 @@ def get_vocab_base_pre(self, tokenizer) -> str:
"""

convert_py_pth = pathlib.Path("convert-hf-to-gguf.py")
convert_py = convert_py_pth.read_text()
convert_py = convert_py_pth.read_text(encoding="utf-8")
convert_py = re.sub(
r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)",
lambda m: m.group(1) + src_func + m.group(3),
convert_py,
flags=re.DOTALL | re.MULTILINE,
)

convert_py_pth.write_text(convert_py)
convert_py_pth.write_text(convert_py, encoding="utf-8")

logger.info("+++ convert-hf-to-gguf.py was updated")

Expand Down
Loading

0 comments on commit 9b00a7e

Please sign in to comment.