From 2b5136e1c2e4a8730ba88193f0e6f76360748cb2 Mon Sep 17 00:00:00 2001 From: cebtenzzre Date: Thu, 2 Nov 2023 12:42:36 -0400 Subject: [PATCH 1/5] cmake : fix joining of REAL_GIT_DIR --- common/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index ac594b2ca84ea..09575e6a1a477 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -11,7 +11,12 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git") if(NOT IS_DIRECTORY "${GIT_DIR}") file(READ ${GIT_DIR} REAL_GIT_DIR_LINK) string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK}) - set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}") + string(FIND "${REAL_GIT_DIR}" "/" SLASH_POS) + if (SLASH_POS EQUAL 0) + set(GIT_DIR "${REAL_GIT_DIR}") + else() + set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}") + endif() endif() set(GIT_INDEX "${GIT_DIR}/index") From 635e9fadfd516d4604a0fecf4a854bfb25ad17ae Mon Sep 17 00:00:00 2001 From: cebtenzzre Date: Wed, 1 Nov 2023 13:09:21 -0400 Subject: [PATCH 2/5] fix includes with help from include-what-you-use --- common/common.cpp | 18 ++++++++----- common/common.h | 11 ++++---- common/console.cpp | 11 ++++---- common/grammar-parser.cpp | 1 - common/grammar-parser.h | 7 +++-- common/sampling.cpp | 6 +++++ common/sampling.h | 8 +++--- common/train.cpp | 14 ++++++++-- common/train.h | 9 ++++--- examples/baby-llama/baby-llama.cpp | 6 +++-- examples/batched-bench/batched-bench.cpp | 5 ++-- examples/batched/batched.cpp | 4 ++- examples/beam-search/beam-search.cpp | 19 ++----------- examples/benchmark/benchmark-matmult.cpp | 15 +++-------- .../convert-llama2c-to-ggml.cpp | 19 +++++++------ examples/embedding/embedding.cpp | 5 ++++ examples/export-lora/export-lora.cpp | 9 +++++-- examples/finetune/finetune.cpp | 20 +++++++------- examples/infill/infill.cpp | 12 ++++----- examples/llama-bench/llama-bench.cpp | 14 ++++++---- examples/llava/clip.cpp | 16 ++++++----- examples/llava/clip.h | 3 ++- examples/llava/llava.cpp | 7 +++-- examples/main/main.cpp | 11 ++++---- examples/parallel/parallel.cpp | 11 ++++++-- examples/perplexity/perplexity.cpp | 9 ++++++- examples/quantize-stats/quantize-stats.cpp | 9 ++++--- examples/quantize/quantize.cpp | 6 ++++- examples/save-load-state/save-load-state.cpp | 6 +++-- examples/server/server.cpp | 27 ++++++++++++++++--- examples/simple/simple.cpp | 3 +-- examples/speculative/speculative.cpp | 6 ++++- .../train-text-from-scratch.cpp | 21 +++++++-------- ggml-alloc.c | 3 ++- ggml-alloc.h | 3 +++ ggml-backend.c | 3 --- ggml-backend.h | 3 +++ ggml-impl.h | 4 +-- ggml-quants.c | 5 ++-- ggml-quants.h | 6 ++--- ggml.c | 10 +++---- ggml.h | 1 - llama.cpp | 9 +++++-- llama.h | 5 +++- pocs/vdot/q8dot.cpp | 19 ++++++------- pocs/vdot/vdot.cpp | 19 ++++++------- tests/test-grad0.cpp | 3 ++- tests/test-grammar-parser.cpp | 8 +++++- tests/test-llama-grammar.cpp | 6 +++++ tests/test-quantize-fns.cpp | 7 +++-- tests/test-quantize-perf.cpp | 8 +++--- tests/test-rope.cpp | 3 ++- tests/test-sampling.cpp | 8 +----- tests/test-tokenizer-0-falcon.cpp | 8 +++--- tests/test-tokenizer-0-llama.cpp | 8 +++--- tests/test-tokenizer-1-bpe.cpp | 12 ++++----- tests/test-tokenizer-1-llama.cpp | 11 +++----- 57 files changed, 298 insertions(+), 222 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index e938dee165d9d..35b22de0feb73 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1,20 +1,28 @@ #include "common.h" +#include "ggml.h" #include "llama.h" +#include "log.h" +#include "sampling.h" #include -#include +#include +#include +#include #include -#include +#include #include +#include #include #include -#include #include #include +#include #include +#include +#include #include +#include #include -#include #if defined(__APPLE__) && defined(__MACH__) #include @@ -32,9 +40,7 @@ #include #include #else -#include #include -#include #endif #if defined(_MSC_VER) diff --git a/common/common.h b/common/common.h index 72a49b8901f26..c763be8b06e0a 100644 --- a/common/common.h +++ b/common/common.h @@ -3,19 +3,18 @@ #pragma once #include "llama.h" - #include "sampling.h" #define LOG_NO_FILE_LINE_FUNCTION #include "log.h" #include -#include -#include +#include +#include #include -#include -#include +#include #include +#include #ifdef _WIN32 #define DIRECTORY_SEPARATOR '\\' @@ -68,7 +67,7 @@ struct gpt_params { int32_t yarn_orig_ctx = 0; // YaRN original context length int8_t rope_scaling_type = LLAMA_ROPE_SCALING_UNSPECIFIED; - // // sampling parameters + // sampling parameters struct llama_sampling_params sparams; std::string model = "models/7B/ggml-model-f16.gguf"; // model path diff --git a/common/console.cpp b/common/console.cpp index f65cbc6eda0b1..69f1419f18ec5 100644 --- a/common/console.cpp +++ b/common/console.cpp @@ -14,14 +14,13 @@ #define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004 #endif #else -#include +#include +#include +#include + #include -#include -#include -#include -#include -#include #include +#include #endif #define ANSI_COLOR_RED "\x1b[31m" diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp index ff51cc8034c8b..ef56e909dd232 100644 --- a/common/grammar-parser.cpp +++ b/common/grammar-parser.cpp @@ -1,6 +1,5 @@ #include "grammar-parser.h" #include -#include #include #include #include diff --git a/common/grammar-parser.h b/common/grammar-parser.h index 9037d72728a42..b603764b398bd 100644 --- a/common/grammar-parser.h +++ b/common/grammar-parser.h @@ -10,11 +10,14 @@ // space ::= [ \t\n]* #pragma once + #include "llama.h" -#include -#include + #include +#include +#include #include +#include namespace grammar_parser { struct parse_state { diff --git a/common/sampling.cpp b/common/sampling.cpp index 1317024c2c11c..a5b684ee31571 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -1,5 +1,11 @@ +#include "common.h" #include "sampling.h" +#include +#include +#include +#include + struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params) { struct llama_sampling_context * result = new llama_sampling_context(); diff --git a/common/sampling.h b/common/sampling.h index 7c9b8dcf23bcb..03909efbc9c3d 100644 --- a/common/sampling.h +++ b/common/sampling.h @@ -1,12 +1,12 @@ #pragma once -#include "llama.h" - #include "grammar-parser.h" +#include "llama.h" +#include #include -#include #include +#include // sampling parameters typedef struct llama_sampling_params { @@ -56,8 +56,6 @@ struct llama_sampling_context { std::vector cur; }; -#include "common.h" - // Create a new sampling context instance. struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params); diff --git a/common/train.cpp b/common/train.cpp index bc15b7a03c0cd..d1f5505cb23db 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -1,9 +1,19 @@ -#include "train.h" #include "common.h" +#include "ggml.h" +#include "llama.h" +#include "train.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include struct random_normal_distribution { std::mt19937 gen; diff --git a/common/train.h b/common/train.h index d86c93cc4f147..ccac6b7d6bdcc 100644 --- a/common/train.h +++ b/common/train.h @@ -2,13 +2,14 @@ #pragma once -#include +#include "llama.h" + +#include +#include #include +#include #include -#include "ggml.h" -#include "llama.h" - typedef std::string mt19937_state; struct train_state { diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 8155101d0ab93..35404f9a3aead 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -1,11 +1,13 @@ #include "ggml.h" #include "train.h" -#include +#include #include +#include +#include +#include #include #include -#include #include #if defined(_MSC_VER) diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index 533c55c17aad1..955d815cc39bf 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -1,10 +1,11 @@ #include "common.h" +#include "ggml.h" #include "llama.h" #include -#include +#include #include -#include +#include #include // mutates the input string diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index 22a4265df77c0..f918727983c3d 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -1,9 +1,11 @@ #include "common.h" +#include "ggml.h" #include "llama.h" #include -#include +#include #include +#include #include #include diff --git a/examples/beam-search/beam-search.cpp b/examples/beam-search/beam-search.cpp index 679b382e19b4e..cce2b9916ba7f 100644 --- a/examples/beam-search/beam-search.cpp +++ b/examples/beam-search/beam-search.cpp @@ -1,29 +1,14 @@ #include "common.h" #include "llama.h" +#include #include -#include -#include #include -#include -#include -#include #include #include +#include #include -#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) -#include -#include -#elif defined (_WIN32) -#define WIN32_LEAN_AND_MEAN -#ifndef NOMINMAX -# define NOMINMAX -#endif -#include -#include -#endif - // Used for debugging to print out beam tokens. struct ostream_beam_view { llama_context * ctx; diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp index 76e3f57ccce8e..58bf9a814a579 100644 --- a/examples/benchmark/benchmark-matmult.cpp +++ b/examples/benchmark/benchmark-matmult.cpp @@ -1,20 +1,11 @@ #include "common.h" #include "ggml.h" -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include +#include +#include #include -#include -#include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index cae3bf3c3dc65..7ecc15cb5791a 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -1,19 +1,22 @@ +#include "common.h" #include "ggml.h" #include "llama.h" -#include "common.h" -#include -#include +#include #include +#include #include -#include +#include #include -#include -#include -#include +#include +#include +#include +#include #include -#include #include +#include +#include +#include // GGUF keys & tensor names. diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 3295cd2400ac3..86f874a526e0a 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -1,7 +1,12 @@ #include "common.h" #include "llama.h" +#include +#include #include +#include +#include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index d803cfd5cb2d5..a2406de5ab28b 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -1,11 +1,16 @@ - #include "common.h" #include "ggml.h" #include "ggml-alloc.h" -#include +#include +#include +#include +#include +#include +#include #include #include +#include static const size_t tensor_alignment = 32; diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index 649a3b7c1941e..710ddba96051d 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -1,19 +1,19 @@ -#include "ggml.h" +#include "common.h" #include "ggml-alloc.h" +#include "ggml.h" #include "llama.h" -#include "common.h" #include "train.h" -#include -#include -#include -#include + +#include +#include +#include +#include +#include +#include #include -#include #include -#include -#include -#include #include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 62f5ce3c16a32..f9caffacb7a4f 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -1,19 +1,17 @@ #include "common.h" - #include "console.h" #include "llama.h" -#include "grammar-parser.h" +#include "sampling.h" -#include -#include -#include +#include #include -#include +#include #include #include -#include +#include #include #include +#include #include #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 9bd82d565834a..0df6c4c45757f 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -1,26 +1,30 @@ +#include "ggml.h" +#include "llama.h" +#include "common.h" +#include "ggml-cuda.h" + #include #include #include +#include #include #include #include #include #include +#include #include #include #include #include +#include #include #include #include #include +#include #include -#include "ggml.h" -#include "llama.h" -#include "common.h" -#include "ggml-cuda.h" - // utils static uint64_t get_time_ns() { using clock = std::chrono::high_resolution_clock; diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 61932e659543c..03a8e9c463ef2 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -2,21 +2,23 @@ // so there might be still unnecessary artifacts hanging around // I'll gradually clean and extend it -#include +#include "clip.h" +#include "ggml-alloc.h" +#include "ggml.h" + +#include +#include #include +#include +#include #include #include #include -#include -#include #include #include +#include #include -#include "clip.h" -#include "ggml.h" -#include "ggml-alloc.h" - #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" diff --git a/examples/llava/clip.h b/examples/llava/clip.h index 3d7261e299a35..106df0d1a6225 100644 --- a/examples/llava/clip.h +++ b/examples/llava/clip.h @@ -1,7 +1,8 @@ #ifndef CLIP_H #define CLIP_H -#include "ggml.h" +#include +#include struct clip_ctx; diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index f0974d5bcf452..2c5a863988f73 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -1,11 +1,14 @@ #include "clip.h" -#include "llava-utils.h" #include "common.h" +#include "ggml.h" #include "llama.h" +#include "llava-utils.h" +#include #include #include -#include +#include +#include static void show_additional_info(int /*argc*/, char ** argv) { printf("\n example usage: %s -m --mmproj --image [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]); diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 8d985c82ac21a..ce68efae44af9 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -1,18 +1,17 @@ #include "common.h" - #include "console.h" #include "llama.h" +#include "sampling.h" -#include -#include -#include +#include #include -#include +#include #include #include -#include +#include #include #include +#include #include #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index a78df305f415c..9b107e5f1aba3 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -2,13 +2,20 @@ // The clients submite requests to the server and they are processed in parallel. #include "common.h" +#include "ggml.h" #include "llama.h" +#include "sampling.h" -#include +#include +#include +#include #include +#include +#include +#include #include +#include #include -#include // trim whitespace from the beginning and end of a string static std::string trim(const std::string & str) { diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index de60c5227f7c1..f8e8f8fc5c442 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -1,13 +1,20 @@ #include "common.h" #include "llama.h" +#include +#include #include #include #include #include +#include +#include +#include #include +#include #include -#include +#include +#include #include #if defined(_MSC_VER) diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 2712824774ae7..d5c3c35919821 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -8,15 +8,16 @@ #include #include #include +#include #include -#include +#include +#include #include #include #include -#include -#include #include -#include +#include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index d27ea5e9132fd..ce6863a2b3169 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -1,10 +1,14 @@ #include "common.h" #include "llama.h" +#include +#include #include +#include #include -#include +#include #include +#include struct quant_option { std::string name; diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 48d80111010df..622101449a53a 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -1,9 +1,11 @@ #include "common.h" #include "llama.h" -#include +#include #include -#include +#include +#include +#include int main(int argc, char ** argv) { gpt_params params; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index fd755327a511d..0da14c9b70517 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1,6 +1,7 @@ #include "common.h" +#include "ggml.h" #include "llama.h" -#include "grammar-parser.h" +#include "sampling.h" #include "../llava/clip.h" @@ -20,10 +21,28 @@ #include "completion.js.hpp" #include "json-schema-to-grammar.mjs.hpp" -#include -#include -#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #ifndef SERVER_VERBOSE #define SERVER_VERBOSE 1 diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index 374aef6f16189..52b0de48f5e7f 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -1,9 +1,8 @@ #include "common.h" +#include "ggml.h" #include "llama.h" -#include #include -#include #include int main(int argc, char ** argv) { diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 798684f66678e..3e65d7e6e2872 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -1,9 +1,13 @@ #include "common.h" +#include "ggml.h" #include "llama.h" +#include "sampling.h" -#include +#include #include +#include #include +#include #include #define SPEC_VOCAB_MAX_SIZE_DIFFERENCE 100 diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 2a257e63215e3..42e5c6b0fbc62 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -1,19 +1,18 @@ -#include "ggml.h" -#include "ggml-alloc.h" #include "common.h" -#include "train.h" +#include "ggml-alloc.h" +#include "ggml.h" #include "llama.h" -#include -#include -#include -#include +#include "train.h" + +#include +#include +#include +#include +#include #include -#include #include -#include -#include -#include #include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/ggml-alloc.c b/ggml-alloc.c index 34eba3f830e84..c9fd6e54d91b1 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -1,8 +1,9 @@ #include "ggml-alloc.h" #include "ggml-backend.h" #include "ggml.h" + #include -#include +#include #include #include #include diff --git a/ggml-alloc.h b/ggml-alloc.h index e38758878b91a..4ebba6a6b3d57 100644 --- a/ggml-alloc.h +++ b/ggml-alloc.h @@ -2,6 +2,9 @@ #include "ggml.h" +#include +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/ggml-backend.c b/ggml-backend.c index ca8d83dafe47c..89af304d3cb07 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1,8 +1,5 @@ #include "ggml-backend.h" -#include "ggml-alloc.h" -#include -#include #include #include #include diff --git a/ggml-backend.h b/ggml-backend.h index da134b0dbed51..12618036bdb10 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -2,6 +2,9 @@ #include "ggml.h" +#include +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/ggml-impl.h b/ggml-impl.h index 5ec18a50c8da5..8a9fb73884c82 100644 --- a/ggml-impl.h +++ b/ggml-impl.h @@ -1,9 +1,9 @@ #pragma once -#include "ggml.h" - // GGML internal header +#include "ggml.h" + #include #include #include diff --git a/ggml-quants.c b/ggml-quants.c index 740be6dc5c798..39f2c27b3c3c6 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -1,10 +1,11 @@ #include "ggml-quants.h" #include "ggml-impl.h" -#include -#include #include #include +#include +#include +#include #ifdef __ARM_NEON diff --git a/ggml-quants.h b/ggml-quants.h index 70c12c27465e8..f782d54c8e6d9 100644 --- a/ggml-quants.h +++ b/ggml-quants.h @@ -1,11 +1,11 @@ #pragma once -#include "ggml-impl.h" - // GGML internal header +#include "ggml.h" + +#include #include -#include #define QK4_0 32 typedef struct { diff --git a/ggml.c b/ggml.c index 605a27940fc81..fca45ad14d5d3 100644 --- a/ggml.c +++ b/ggml.c @@ -1,6 +1,7 @@ #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows #define _USE_MATH_DEFINES // For M_PI on MSVC +#include "ggml.h" #include "ggml-impl.h" #include "ggml-quants.h" @@ -20,9 +21,7 @@ #include #include #include -#include #include -#include #ifdef GGML_USE_METAL #include @@ -85,15 +84,14 @@ static int sched_yield (void) { return 0; } #else + #include +#include #include +#include typedef void * thread_ret_t; -#include -#include -#include - #endif #ifdef GGML_USE_CPU_HBM diff --git a/ggml.h b/ggml.h index 70eb25a6bf3af..5b27b7ad207fd 100644 --- a/ggml.h +++ b/ggml.h @@ -300,7 +300,6 @@ extern "C" { GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n); GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n); - struct ggml_object; struct ggml_context; enum ggml_type { diff --git a/llama.cpp b/llama.cpp index bb60044b4707f..93cdc98247570 100644 --- a/llama.cpp +++ b/llama.cpp @@ -52,29 +52,34 @@ #include #include #include +#include #include #include #include #include -#include #include #include +#include #include #include +#include #include #include #include #include +#include +#include #include #include #include #include #include #include -#include #include #include +#include #include +#include #include #if defined(_MSC_VER) diff --git a/llama.h b/llama.h index 3f1becd761688..dd16407c81d2c 100644 --- a/llama.h +++ b/llama.h @@ -2,12 +2,14 @@ #define LLAMA_H #include "ggml.h" + #ifdef GGML_USE_CUBLAS #include "ggml-cuda.h" #define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES #else #define LLAMA_MAX_DEVICES 1 #endif // GGML_USE_CUBLAS + #include #include #include @@ -759,8 +761,9 @@ extern "C" { // Internal API to be implemented by llama.cpp and used by tests/benchmarks only #ifdef LLAMA_API_INTERNAL -#include #include +#include +#include struct ggml_tensor; diff --git a/pocs/vdot/q8dot.cpp b/pocs/vdot/q8dot.cpp index 111770d5519cb..05e85ee300f1c 100644 --- a/pocs/vdot/q8dot.cpp +++ b/pocs/vdot/q8dot.cpp @@ -1,16 +1,13 @@ -#include -#include -#include -#include +#include "ggml.h" + +#include #include -#include #include -#include -#include -#include -#include - -#include +#include +#include +#include +#include +#include constexpr int kVecSize = 1 << 16; diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp index e96372c4b7107..5b0ebb7067e08 100644 --- a/pocs/vdot/vdot.cpp +++ b/pocs/vdot/vdot.cpp @@ -1,14 +1,15 @@ -#include -#include -#include +#include "ggml.h" + +#include +#include #include -#include #include -#include -#include -#include - -#include +#include +#include +#include +#include +#include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp index 0a559b27ab370..49a92ed23481b 100644 --- a/tests/test-grad0.cpp +++ b/tests/test-grad0.cpp @@ -1,10 +1,11 @@ #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows #include "ggml.h" +#include #include +#include #include #include -#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp index a0b5b043df868..b4d825b6271e2 100644 --- a/tests/test-grammar-parser.cpp +++ b/tests/test-grammar-parser.cpp @@ -2,10 +2,16 @@ #undef NDEBUG #endif -#include "llama.h" #include "grammar-parser.h" +#include "llama.h" #include +#include +#include +#include +#include +#include +#include int main() { diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp index 73dd33dd286a5..cbbe95bd3e6a1 100644 --- a/tests/test-llama-grammar.cpp +++ b/tests/test-llama-grammar.cpp @@ -6,6 +6,12 @@ #include "grammar-parser.h" #include +#include +#include +#include +#include +#include +#include int main() { diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index a2459a2867c5c..3b88b49890bd4 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -2,10 +2,9 @@ #include "ggml.h" -#undef NDEBUG -#include -#include -#include +#include +#include +#include #include #include diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index 88fac0e23106b..b4260e54882ea 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -2,14 +2,12 @@ #include "ggml.h" -#undef NDEBUG #include -#include +#include +#include +#include #include -#include -#include #include -#include #include #include diff --git a/tests/test-rope.cpp b/tests/test-rope.cpp index 26c1f42dc0e95..2fda30ccba531 100644 --- a/tests/test-rope.cpp +++ b/tests/test-rope.cpp @@ -1,9 +1,10 @@ #include "ggml.h" +#include #include +#include #include #include -#include #include #if defined(_MSC_VER) diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp index 32e58941c0ee0..63ae06d309452 100644 --- a/tests/test-sampling.cpp +++ b/tests/test-sampling.cpp @@ -1,15 +1,9 @@ #include "ggml.h" #include "llama.h" -#ifdef NDEBUG -#undef NDEBUG -#endif - #include -#include -#include +#include #include -#include static void dump(const llama_token_data_array * candidates) { for (size_t i = 0; i < candidates->size; i++) { diff --git a/tests/test-tokenizer-0-falcon.cpp b/tests/test-tokenizer-0-falcon.cpp index a4e9d2b912728..d2f983baa5402 100644 --- a/tests/test-tokenizer-0-falcon.cpp +++ b/tests/test-tokenizer-0-falcon.cpp @@ -1,12 +1,14 @@ -#include "llama.h" #include "common.h" #include "console.h" +#include "llama.h" #include -#include +#include +#include #include +#include +#include #include -#include // generate using test-tokenizer-0-falcon.py static const std::map> & k_tests() { diff --git a/tests/test-tokenizer-0-llama.cpp b/tests/test-tokenizer-0-llama.cpp index 39c8d188c9086..c00a668de95f4 100644 --- a/tests/test-tokenizer-0-llama.cpp +++ b/tests/test-tokenizer-0-llama.cpp @@ -1,12 +1,14 @@ -#include "llama.h" #include "common.h" #include "console.h" +#include "llama.h" #include -#include +#include +#include #include +#include +#include #include -#include // generate using test-tokenizer-0-llama.py static const std::map> & k_tests() { diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp index 386530f23f92c..cc924da5ce56d 100644 --- a/tests/test-tokenizer-1-bpe.cpp +++ b/tests/test-tokenizer-1-bpe.cpp @@ -1,16 +1,14 @@ -#include "llama.h" #include "common.h" -#include "unicode.h" #include "console.h" +#include "ggml.h" +#include "llama.h" +#include "unicode.h" -#include +#include #include -#include +#include #include -#include -#include #include -#include int main(int argc, char **argv) { if (argc < 2) { diff --git a/tests/test-tokenizer-1-llama.cpp b/tests/test-tokenizer-1-llama.cpp index 4b58fe4954cf3..95832f899cafc 100644 --- a/tests/test-tokenizer-1-llama.cpp +++ b/tests/test-tokenizer-1-llama.cpp @@ -1,16 +1,13 @@ -#include "llama.h" #include "common.h" -#include "unicode.h" #include "console.h" +#include "ggml.h" +#include "llama.h" +#include "unicode.h" -#include +#include #include -#include #include -#include -#include #include -#include int main(int argc, char **argv) { if (argc < 2) { From a9162dd01feff4fab6881ce327bdce2f0f114cef Mon Sep 17 00:00:00 2001 From: cebtenzzre Date: Thu, 2 Nov 2023 17:54:57 -0400 Subject: [PATCH 3/5] make : remove unneeded deps and add test-rope target --- .gitignore | 25 +++++++++++++------------ Makefile | 23 +++++++++++++---------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 50cbd0b47cae3..9556fec9b7f8f 100644 --- a/.gitignore +++ b/.gitignore @@ -86,15 +86,16 @@ poetry.lock poetry.toml # Test binaries -tests/test-grammar-parser -tests/test-llama-grammar -tests/test-double-float -tests/test-grad0 -tests/test-opt -tests/test-quantize-fns -tests/test-quantize-perf -tests/test-sampling -tests/test-tokenizer-0-llama -tests/test-tokenizer-0-falcon -tests/test-tokenizer-1-llama -tests/test-tokenizer-1-bpe +/tests/test-grammar-parser +/tests/test-llama-grammar +/tests/test-double-float +/tests/test-grad0 +/tests/test-opt +/tests/test-quantize-fns +/tests/test-quantize-perf +/tests/test-sampling +/tests/test-tokenizer-0-llama +/tests/test-tokenizer-0-falcon +/tests/test-tokenizer-1-llama +/tests/test-tokenizer-1-bpe +/tests/test-rope diff --git a/Makefile b/Makefile index 300c1e6c7e127..c8a82a8573d0e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ BUILD_TARGETS = \ TEST_TARGETS = \ tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \ tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \ - tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe + tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope # Code coverage output files COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report @@ -629,7 +629,7 @@ beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS) finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -export-lora: examples/export-lora/export-lora.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) +export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) @@ -679,28 +679,28 @@ vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS) q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS) $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) -tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o $(COMMON_DEPS) grammar-parser.o $(OBJS) +tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) +tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -tests/test-double-float: tests/test-double-float.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) +tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -tests/test-grad0: tests/test-grad0.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) +tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -tests/test-opt: tests/test-opt.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) +tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) +tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) +tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) +tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) @@ -715,5 +715,8 @@ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMM tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) +tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + tests/test-c.o: tests/test-c.c llama.h $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@ From f30b4e69d1668971dd00bae361f419a7b598b798 Mon Sep 17 00:00:00 2001 From: cebtenzzre Date: Thu, 2 Nov 2023 18:01:13 -0400 Subject: [PATCH 4/5] fix C includes in C++ source files --- ggml-opencl.cpp | 12 +++++------- llama.cpp | 1 - 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp index 202bcb4853893..496f9cdca542d 100644 --- a/ggml-opencl.cpp +++ b/ggml-opencl.cpp @@ -1,20 +1,18 @@ +#include "ggml.h" #include "ggml-opencl.h" #include #include +#include +#include +#include +#include #include #include -#include #define CL_TARGET_OPENCL_VERSION 110 #include -#include -#include -#include - -#include "ggml.h" - #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif diff --git a/llama.cpp b/llama.cpp index 93cdc98247570..a6353fcec6581 100644 --- a/llama.cpp +++ b/llama.cpp @@ -46,7 +46,6 @@ #endif #include #include - #include // for _fseeki64 #endif #include From f595b697980223699c6c881711182bfe8451d4ee Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Thu, 30 Nov 2023 17:11:31 -0500 Subject: [PATCH 5/5] Revert "fix includes with help from include-what-you-use" This reverts commit 635e9fadfd516d4604a0fecf4a854bfb25ad17ae. --- common/common.cpp | 18 +++++-------- common/common.h | 11 ++++---- common/console.cpp | 11 ++++---- common/grammar-parser.cpp | 1 + common/grammar-parser.h | 7 ++--- common/sampling.cpp | 6 ----- common/sampling.h | 8 +++--- common/train.cpp | 14 ++-------- common/train.h | 9 +++---- examples/baby-llama/baby-llama.cpp | 6 ++--- examples/batched-bench/batched-bench.cpp | 5 ++-- examples/batched/batched.cpp | 4 +-- examples/beam-search/beam-search.cpp | 19 +++++++++++-- examples/benchmark/benchmark-matmult.cpp | 15 ++++++++--- .../convert-llama2c-to-ggml.cpp | 19 ++++++------- examples/embedding/embedding.cpp | 5 ---- examples/export-lora/export-lora.cpp | 9 ++----- examples/finetune/finetune.cpp | 20 +++++++------- examples/infill/infill.cpp | 12 +++++---- examples/llama-bench/llama-bench.cpp | 14 ++++------ examples/llava/clip.cpp | 16 +++++------ examples/llava/clip.h | 3 +-- examples/llava/llava.cpp | 7 ++--- examples/main/main.cpp | 11 ++++---- examples/parallel/parallel.cpp | 11 ++------ examples/perplexity/perplexity.cpp | 9 +------ examples/quantize-stats/quantize-stats.cpp | 9 +++---- examples/quantize/quantize.cpp | 6 +---- examples/save-load-state/save-load-state.cpp | 6 ++--- examples/server/server.cpp | 27 +++---------------- examples/simple/simple.cpp | 3 ++- examples/speculative/speculative.cpp | 6 +---- .../train-text-from-scratch.cpp | 21 ++++++++------- ggml-alloc.c | 3 +-- ggml-alloc.h | 3 --- ggml-backend.c | 3 +++ ggml-backend.h | 3 --- ggml-impl.h | 4 +-- ggml-quants.c | 5 ++-- ggml-quants.h | 6 ++--- ggml.c | 10 ++++--- ggml.h | 1 + llama.cpp | 9 ++----- llama.h | 5 +--- pocs/vdot/q8dot.cpp | 19 +++++++------ pocs/vdot/vdot.cpp | 19 +++++++------ tests/test-grad0.cpp | 3 +-- tests/test-grammar-parser.cpp | 8 +----- tests/test-llama-grammar.cpp | 6 ----- tests/test-quantize-fns.cpp | 7 ++--- tests/test-quantize-perf.cpp | 8 +++--- tests/test-rope.cpp | 3 +-- tests/test-sampling.cpp | 8 +++++- tests/test-tokenizer-0-falcon.cpp | 8 +++--- tests/test-tokenizer-0-llama.cpp | 8 +++--- tests/test-tokenizer-1-bpe.cpp | 12 +++++---- tests/test-tokenizer-1-llama.cpp | 11 +++++--- 57 files changed, 222 insertions(+), 298 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 35b22de0feb73..e938dee165d9d 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1,28 +1,20 @@ #include "common.h" -#include "ggml.h" #include "llama.h" -#include "log.h" -#include "sampling.h" #include -#include -#include -#include +#include #include -#include +#include #include -#include #include #include +#include #include #include -#include #include -#include -#include #include -#include #include +#include #if defined(__APPLE__) && defined(__MACH__) #include @@ -40,7 +32,9 @@ #include #include #else +#include #include +#include #endif #if defined(_MSC_VER) diff --git a/common/common.h b/common/common.h index c763be8b06e0a..72a49b8901f26 100644 --- a/common/common.h +++ b/common/common.h @@ -3,18 +3,19 @@ #pragma once #include "llama.h" + #include "sampling.h" #define LOG_NO_FILE_LINE_FUNCTION #include "log.h" #include -#include -#include -#include #include -#include #include +#include +#include +#include +#include #ifdef _WIN32 #define DIRECTORY_SEPARATOR '\\' @@ -67,7 +68,7 @@ struct gpt_params { int32_t yarn_orig_ctx = 0; // YaRN original context length int8_t rope_scaling_type = LLAMA_ROPE_SCALING_UNSPECIFIED; - // sampling parameters + // // sampling parameters struct llama_sampling_params sparams; std::string model = "models/7B/ggml-model-f16.gguf"; // model path diff --git a/common/console.cpp b/common/console.cpp index 69f1419f18ec5..f65cbc6eda0b1 100644 --- a/common/console.cpp +++ b/common/console.cpp @@ -14,13 +14,14 @@ #define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004 #endif #else -#include -#include -#include - +#include #include -#include #include +#include +#include +#include +#include +#include #endif #define ANSI_COLOR_RED "\x1b[31m" diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp index ef56e909dd232..ff51cc8034c8b 100644 --- a/common/grammar-parser.cpp +++ b/common/grammar-parser.cpp @@ -1,5 +1,6 @@ #include "grammar-parser.h" #include +#include #include #include #include diff --git a/common/grammar-parser.h b/common/grammar-parser.h index b603764b398bd..9037d72728a42 100644 --- a/common/grammar-parser.h +++ b/common/grammar-parser.h @@ -10,14 +10,11 @@ // space ::= [ \t\n]* #pragma once - #include "llama.h" - -#include -#include +#include #include +#include #include -#include namespace grammar_parser { struct parse_state { diff --git a/common/sampling.cpp b/common/sampling.cpp index a5b684ee31571..1317024c2c11c 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -1,11 +1,5 @@ -#include "common.h" #include "sampling.h" -#include -#include -#include -#include - struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params) { struct llama_sampling_context * result = new llama_sampling_context(); diff --git a/common/sampling.h b/common/sampling.h index 03909efbc9c3d..7c9b8dcf23bcb 100644 --- a/common/sampling.h +++ b/common/sampling.h @@ -1,12 +1,12 @@ #pragma once -#include "grammar-parser.h" #include "llama.h" -#include +#include "grammar-parser.h" + #include -#include #include +#include // sampling parameters typedef struct llama_sampling_params { @@ -56,6 +56,8 @@ struct llama_sampling_context { std::vector cur; }; +#include "common.h" + // Create a new sampling context instance. struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params); diff --git a/common/train.cpp b/common/train.cpp index d1f5505cb23db..bc15b7a03c0cd 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -1,19 +1,9 @@ -#include "common.h" -#include "ggml.h" -#include "llama.h" #include "train.h" +#include "common.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include struct random_normal_distribution { std::mt19937 gen; diff --git a/common/train.h b/common/train.h index ccac6b7d6bdcc..d86c93cc4f147 100644 --- a/common/train.h +++ b/common/train.h @@ -2,14 +2,13 @@ #pragma once -#include "llama.h" - -#include -#include -#include #include +#include #include +#include "ggml.h" +#include "llama.h" + typedef std::string mt19937_state; struct train_state { diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 35404f9a3aead..8155101d0ab93 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -1,13 +1,11 @@ #include "ggml.h" #include "train.h" -#include +#include #include -#include -#include -#include #include #include +#include #include #if defined(_MSC_VER) diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index 955d815cc39bf..533c55c17aad1 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -1,11 +1,10 @@ #include "common.h" -#include "ggml.h" #include "llama.h" #include -#include +#include #include -#include +#include #include // mutates the input string diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index f918727983c3d..22a4265df77c0 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -1,11 +1,9 @@ #include "common.h" -#include "ggml.h" #include "llama.h" #include -#include +#include #include -#include #include #include diff --git a/examples/beam-search/beam-search.cpp b/examples/beam-search/beam-search.cpp index cce2b9916ba7f..679b382e19b4e 100644 --- a/examples/beam-search/beam-search.cpp +++ b/examples/beam-search/beam-search.cpp @@ -1,14 +1,29 @@ #include "common.h" #include "llama.h" -#include #include +#include +#include #include +#include +#include +#include #include #include -#include #include +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include +#include +#elif defined (_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +# define NOMINMAX +#endif +#include +#include +#endif + // Used for debugging to print out beam tokens. struct ostream_beam_view { llama_context * ctx; diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp index 58bf9a814a579..76e3f57ccce8e 100644 --- a/examples/benchmark/benchmark-matmult.cpp +++ b/examples/benchmark/benchmark-matmult.cpp @@ -1,11 +1,20 @@ #include "common.h" #include "ggml.h" -#include +#include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include #include -#include +#include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index 7ecc15cb5791a..cae3bf3c3dc65 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -1,22 +1,19 @@ -#include "common.h" #include "ggml.h" #include "llama.h" +#include "common.h" -#include +#include +#include #include -#include #include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include #include +#include #include -#include -#include -#include // GGUF keys & tensor names. diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 86f874a526e0a..3295cd2400ac3 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -1,12 +1,7 @@ #include "common.h" #include "llama.h" -#include -#include #include -#include -#include -#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index a2406de5ab28b..d803cfd5cb2d5 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -1,16 +1,11 @@ + #include "common.h" #include "ggml.h" #include "ggml-alloc.h" -#include -#include -#include -#include -#include -#include +#include #include #include -#include static const size_t tensor_alignment = 32; diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index 710ddba96051d..649a3b7c1941e 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -1,19 +1,19 @@ -#include "common.h" -#include "ggml-alloc.h" #include "ggml.h" +#include "ggml-alloc.h" #include "llama.h" +#include "common.h" #include "train.h" - -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include +#include #include +#include +#include +#include #include -#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index f9caffacb7a4f..62f5ce3c16a32 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -1,17 +1,19 @@ #include "common.h" + #include "console.h" #include "llama.h" -#include "sampling.h" +#include "grammar-parser.h" -#include +#include +#include +#include #include -#include +#include #include #include -#include +#include #include #include -#include #include #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 0df6c4c45757f..9bd82d565834a 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -1,30 +1,26 @@ -#include "ggml.h" -#include "llama.h" -#include "common.h" -#include "ggml-cuda.h" - #include #include #include -#include #include #include #include #include #include -#include #include #include #include #include -#include #include #include #include #include -#include #include +#include "ggml.h" +#include "llama.h" +#include "common.h" +#include "ggml-cuda.h" + // utils static uint64_t get_time_ns() { using clock = std::chrono::high_resolution_clock; diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 03a8e9c463ef2..61932e659543c 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -2,23 +2,21 @@ // so there might be still unnecessary artifacts hanging around // I'll gradually clean and extend it -#include "clip.h" -#include "ggml-alloc.h" -#include "ggml.h" - -#include -#include +#include #include -#include -#include #include #include #include +#include +#include #include #include -#include #include +#include "clip.h" +#include "ggml.h" +#include "ggml-alloc.h" + #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" diff --git a/examples/llava/clip.h b/examples/llava/clip.h index 106df0d1a6225..3d7261e299a35 100644 --- a/examples/llava/clip.h +++ b/examples/llava/clip.h @@ -1,8 +1,7 @@ #ifndef CLIP_H #define CLIP_H -#include -#include +#include "ggml.h" struct clip_ctx; diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index 2c5a863988f73..f0974d5bcf452 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -1,14 +1,11 @@ #include "clip.h" +#include "llava-utils.h" #include "common.h" -#include "ggml.h" #include "llama.h" -#include "llava-utils.h" -#include #include #include -#include -#include +#include static void show_additional_info(int /*argc*/, char ** argv) { printf("\n example usage: %s -m --mmproj --image [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]); diff --git a/examples/main/main.cpp b/examples/main/main.cpp index ce68efae44af9..8d985c82ac21a 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -1,17 +1,18 @@ #include "common.h" + #include "console.h" #include "llama.h" -#include "sampling.h" -#include +#include +#include +#include #include -#include +#include #include #include -#include +#include #include #include -#include #include #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index 9b107e5f1aba3..a78df305f415c 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -2,20 +2,13 @@ // The clients submite requests to the server and they are processed in parallel. #include "common.h" -#include "ggml.h" #include "llama.h" -#include "sampling.h" -#include -#include -#include +#include #include -#include -#include -#include #include -#include #include +#include // trim whitespace from the beginning and end of a string static std::string trim(const std::string & str) { diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index f8e8f8fc5c442..de60c5227f7c1 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -1,20 +1,13 @@ #include "common.h" #include "llama.h" -#include -#include #include #include #include #include -#include -#include -#include #include -#include #include -#include -#include +#include #include #if defined(_MSC_VER) diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index d5c3c35919821..2712824774ae7 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -8,16 +8,15 @@ #include #include #include -#include #include -#include -#include +#include #include #include #include -#include -#include +#include #include +#include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index ce6863a2b3169..d27ea5e9132fd 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -1,14 +1,10 @@ #include "common.h" #include "llama.h" -#include -#include #include -#include #include -#include -#include #include +#include struct quant_option { std::string name; diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 622101449a53a..48d80111010df 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -1,11 +1,9 @@ #include "common.h" #include "llama.h" -#include -#include -#include -#include #include +#include +#include int main(int argc, char ** argv) { gpt_params params; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 0da14c9b70517..fd755327a511d 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1,7 +1,6 @@ #include "common.h" -#include "ggml.h" #include "llama.h" -#include "sampling.h" +#include "grammar-parser.h" #include "../llava/clip.h" @@ -21,28 +20,10 @@ #include "completion.js.hpp" #include "json-schema-to-grammar.mjs.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include +#include +#include #ifndef SERVER_VERBOSE #define SERVER_VERBOSE 1 diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index 52b0de48f5e7f..374aef6f16189 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -1,8 +1,9 @@ #include "common.h" -#include "ggml.h" #include "llama.h" +#include #include +#include #include int main(int argc, char ** argv) { diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 3e65d7e6e2872..798684f66678e 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -1,13 +1,9 @@ #include "common.h" -#include "ggml.h" #include "llama.h" -#include "sampling.h" -#include +#include #include -#include #include -#include #include #define SPEC_VOCAB_MAX_SIZE_DIFFERENCE 100 diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 42e5c6b0fbc62..2a257e63215e3 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -1,18 +1,19 @@ -#include "common.h" -#include "ggml-alloc.h" #include "ggml.h" -#include "llama.h" +#include "ggml-alloc.h" +#include "common.h" #include "train.h" - -#include -#include -#include -#include -#include +#include "llama.h" +#include +#include +#include +#include #include +#include #include +#include +#include +#include #include -#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/ggml-alloc.c b/ggml-alloc.c index c9fd6e54d91b1..34eba3f830e84 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -1,9 +1,8 @@ #include "ggml-alloc.h" #include "ggml-backend.h" #include "ggml.h" - #include -#include +#include #include #include #include diff --git a/ggml-alloc.h b/ggml-alloc.h index 4ebba6a6b3d57..e38758878b91a 100644 --- a/ggml-alloc.h +++ b/ggml-alloc.h @@ -2,9 +2,6 @@ #include "ggml.h" -#include -#include - #ifdef __cplusplus extern "C" { #endif diff --git a/ggml-backend.c b/ggml-backend.c index 89af304d3cb07..ca8d83dafe47c 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1,5 +1,8 @@ #include "ggml-backend.h" +#include "ggml-alloc.h" +#include +#include #include #include #include diff --git a/ggml-backend.h b/ggml-backend.h index 12618036bdb10..da134b0dbed51 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -2,9 +2,6 @@ #include "ggml.h" -#include -#include - #ifdef __cplusplus extern "C" { #endif diff --git a/ggml-impl.h b/ggml-impl.h index 8a9fb73884c82..5ec18a50c8da5 100644 --- a/ggml-impl.h +++ b/ggml-impl.h @@ -1,9 +1,9 @@ #pragma once -// GGML internal header - #include "ggml.h" +// GGML internal header + #include #include #include diff --git a/ggml-quants.c b/ggml-quants.c index 39f2c27b3c3c6..740be6dc5c798 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -1,11 +1,10 @@ #include "ggml-quants.h" #include "ggml-impl.h" -#include -#include #include -#include #include +#include +#include #ifdef __ARM_NEON diff --git a/ggml-quants.h b/ggml-quants.h index f782d54c8e6d9..70c12c27465e8 100644 --- a/ggml-quants.h +++ b/ggml-quants.h @@ -1,11 +1,11 @@ #pragma once -// GGML internal header +#include "ggml-impl.h" -#include "ggml.h" +// GGML internal header -#include #include +#include #define QK4_0 32 typedef struct { diff --git a/ggml.c b/ggml.c index fca45ad14d5d3..605a27940fc81 100644 --- a/ggml.c +++ b/ggml.c @@ -1,7 +1,6 @@ #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows #define _USE_MATH_DEFINES // For M_PI on MSVC -#include "ggml.h" #include "ggml-impl.h" #include "ggml-quants.h" @@ -21,7 +20,9 @@ #include #include #include +#include #include +#include #ifdef GGML_USE_METAL #include @@ -84,14 +85,15 @@ static int sched_yield (void) { return 0; } #else - #include -#include #include -#include typedef void * thread_ret_t; +#include +#include +#include + #endif #ifdef GGML_USE_CPU_HBM diff --git a/ggml.h b/ggml.h index 5b27b7ad207fd..70eb25a6bf3af 100644 --- a/ggml.h +++ b/ggml.h @@ -300,6 +300,7 @@ extern "C" { GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n); GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n); + struct ggml_object; struct ggml_context; enum ggml_type { diff --git a/llama.cpp b/llama.cpp index a6353fcec6581..518aa5b98dc39 100644 --- a/llama.cpp +++ b/llama.cpp @@ -51,34 +51,29 @@ #include #include #include -#include #include #include #include #include +#include #include #include -#include #include #include -#include #include #include #include #include -#include -#include #include #include #include #include #include #include +#include #include #include -#include #include -#include #include #if defined(_MSC_VER) diff --git a/llama.h b/llama.h index dd16407c81d2c..3f1becd761688 100644 --- a/llama.h +++ b/llama.h @@ -2,14 +2,12 @@ #define LLAMA_H #include "ggml.h" - #ifdef GGML_USE_CUBLAS #include "ggml-cuda.h" #define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES #else #define LLAMA_MAX_DEVICES 1 #endif // GGML_USE_CUBLAS - #include #include #include @@ -761,9 +759,8 @@ extern "C" { // Internal API to be implemented by llama.cpp and used by tests/benchmarks only #ifdef LLAMA_API_INTERNAL -#include -#include #include +#include struct ggml_tensor; diff --git a/pocs/vdot/q8dot.cpp b/pocs/vdot/q8dot.cpp index 05e85ee300f1c..111770d5519cb 100644 --- a/pocs/vdot/q8dot.cpp +++ b/pocs/vdot/q8dot.cpp @@ -1,13 +1,16 @@ -#include "ggml.h" - -#include -#include -#include -#include #include -#include -#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include + +#include constexpr int kVecSize = 1 << 16; diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp index 5b0ebb7067e08..e96372c4b7107 100644 --- a/pocs/vdot/vdot.cpp +++ b/pocs/vdot/vdot.cpp @@ -1,15 +1,14 @@ -#include "ggml.h" - -#include -#include -#include -#include -#include #include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include + +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp index 49a92ed23481b..0a559b27ab370 100644 --- a/tests/test-grad0.cpp +++ b/tests/test-grad0.cpp @@ -1,11 +1,10 @@ #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows #include "ggml.h" -#include #include -#include #include #include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp index b4d825b6271e2..a0b5b043df868 100644 --- a/tests/test-grammar-parser.cpp +++ b/tests/test-grammar-parser.cpp @@ -2,16 +2,10 @@ #undef NDEBUG #endif -#include "grammar-parser.h" #include "llama.h" +#include "grammar-parser.h" #include -#include -#include -#include -#include -#include -#include int main() { diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp index cbbe95bd3e6a1..73dd33dd286a5 100644 --- a/tests/test-llama-grammar.cpp +++ b/tests/test-llama-grammar.cpp @@ -6,12 +6,6 @@ #include "grammar-parser.h" #include -#include -#include -#include -#include -#include -#include int main() { diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 3b88b49890bd4..a2459a2867c5c 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -2,9 +2,10 @@ #include "ggml.h" -#include -#include -#include +#undef NDEBUG +#include +#include +#include #include #include diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index b4260e54882ea..88fac0e23106b 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -2,12 +2,14 @@ #include "ggml.h" +#undef NDEBUG #include -#include -#include -#include +#include #include +#include +#include #include +#include #include #include diff --git a/tests/test-rope.cpp b/tests/test-rope.cpp index 2fda30ccba531..26c1f42dc0e95 100644 --- a/tests/test-rope.cpp +++ b/tests/test-rope.cpp @@ -1,10 +1,9 @@ #include "ggml.h" -#include #include -#include #include #include +#include #include #if defined(_MSC_VER) diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp index 63ae06d309452..32e58941c0ee0 100644 --- a/tests/test-sampling.cpp +++ b/tests/test-sampling.cpp @@ -1,9 +1,15 @@ #include "ggml.h" #include "llama.h" +#ifdef NDEBUG +#undef NDEBUG +#endif + #include -#include +#include +#include #include +#include static void dump(const llama_token_data_array * candidates) { for (size_t i = 0; i < candidates->size; i++) { diff --git a/tests/test-tokenizer-0-falcon.cpp b/tests/test-tokenizer-0-falcon.cpp index d2f983baa5402..a4e9d2b912728 100644 --- a/tests/test-tokenizer-0-falcon.cpp +++ b/tests/test-tokenizer-0-falcon.cpp @@ -1,14 +1,12 @@ +#include "llama.h" #include "common.h" #include "console.h" -#include "llama.h" #include -#include -#include -#include #include -#include +#include #include +#include // generate using test-tokenizer-0-falcon.py static const std::map> & k_tests() { diff --git a/tests/test-tokenizer-0-llama.cpp b/tests/test-tokenizer-0-llama.cpp index c00a668de95f4..39c8d188c9086 100644 --- a/tests/test-tokenizer-0-llama.cpp +++ b/tests/test-tokenizer-0-llama.cpp @@ -1,14 +1,12 @@ +#include "llama.h" #include "common.h" #include "console.h" -#include "llama.h" #include -#include -#include -#include #include -#include +#include #include +#include // generate using test-tokenizer-0-llama.py static const std::map> & k_tests() { diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp index cc924da5ce56d..386530f23f92c 100644 --- a/tests/test-tokenizer-1-bpe.cpp +++ b/tests/test-tokenizer-1-bpe.cpp @@ -1,14 +1,16 @@ -#include "common.h" -#include "console.h" -#include "ggml.h" #include "llama.h" +#include "common.h" #include "unicode.h" +#include "console.h" -#include +#include #include -#include +#include #include +#include +#include #include +#include int main(int argc, char **argv) { if (argc < 2) { diff --git a/tests/test-tokenizer-1-llama.cpp b/tests/test-tokenizer-1-llama.cpp index 95832f899cafc..4b58fe4954cf3 100644 --- a/tests/test-tokenizer-1-llama.cpp +++ b/tests/test-tokenizer-1-llama.cpp @@ -1,13 +1,16 @@ -#include "common.h" -#include "console.h" -#include "ggml.h" #include "llama.h" +#include "common.h" #include "unicode.h" +#include "console.h" -#include +#include #include +#include #include +#include +#include #include +#include int main(int argc, char **argv) { if (argc < 2) {