From 2b5136e1c2e4a8730ba88193f0e6f76360748cb2 Mon Sep 17 00:00:00 2001
From: cebtenzzre <cebtenzzre@gmail.com>
Date: Thu, 2 Nov 2023 12:42:36 -0400
Subject: [PATCH 1/5] cmake : fix joining of REAL_GIT_DIR

---
 common/CMakeLists.txt | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index ac594b2ca84ea..09575e6a1a477 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -11,7 +11,12 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
     if(NOT IS_DIRECTORY "${GIT_DIR}")
         file(READ ${GIT_DIR} REAL_GIT_DIR_LINK)
         string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK})
-        set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
+        string(FIND "${REAL_GIT_DIR}" "/" SLASH_POS)
+        if (SLASH_POS EQUAL 0)
+            set(GIT_DIR "${REAL_GIT_DIR}")
+        else()
+            set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
+        endif()
     endif()
 
     set(GIT_INDEX "${GIT_DIR}/index")

From 635e9fadfd516d4604a0fecf4a854bfb25ad17ae Mon Sep 17 00:00:00 2001
From: cebtenzzre <cebtenzzre@gmail.com>
Date: Wed, 1 Nov 2023 13:09:21 -0400
Subject: [PATCH 2/5] fix includes with help from include-what-you-use

---
 common/common.cpp                             | 18 ++++++++-----
 common/common.h                               | 11 ++++----
 common/console.cpp                            | 11 ++++----
 common/grammar-parser.cpp                     |  1 -
 common/grammar-parser.h                       |  7 +++--
 common/sampling.cpp                           |  6 +++++
 common/sampling.h                             |  8 +++---
 common/train.cpp                              | 14 ++++++++--
 common/train.h                                |  9 ++++---
 examples/baby-llama/baby-llama.cpp            |  6 +++--
 examples/batched-bench/batched-bench.cpp      |  5 ++--
 examples/batched/batched.cpp                  |  4 ++-
 examples/beam-search/beam-search.cpp          | 19 ++-----------
 examples/benchmark/benchmark-matmult.cpp      | 15 +++--------
 .../convert-llama2c-to-ggml.cpp               | 19 +++++++------
 examples/embedding/embedding.cpp              |  5 ++++
 examples/export-lora/export-lora.cpp          |  9 +++++--
 examples/finetune/finetune.cpp                | 20 +++++++-------
 examples/infill/infill.cpp                    | 12 ++++-----
 examples/llama-bench/llama-bench.cpp          | 14 ++++++----
 examples/llava/clip.cpp                       | 16 ++++++-----
 examples/llava/clip.h                         |  3 ++-
 examples/llava/llava.cpp                      |  7 +++--
 examples/main/main.cpp                        | 11 ++++----
 examples/parallel/parallel.cpp                | 11 ++++++--
 examples/perplexity/perplexity.cpp            |  9 ++++++-
 examples/quantize-stats/quantize-stats.cpp    |  9 ++++---
 examples/quantize/quantize.cpp                |  6 ++++-
 examples/save-load-state/save-load-state.cpp  |  6 +++--
 examples/server/server.cpp                    | 27 ++++++++++++++++---
 examples/simple/simple.cpp                    |  3 +--
 examples/speculative/speculative.cpp          |  6 ++++-
 .../train-text-from-scratch.cpp               | 21 +++++++--------
 ggml-alloc.c                                  |  3 ++-
 ggml-alloc.h                                  |  3 +++
 ggml-backend.c                                |  3 ---
 ggml-backend.h                                |  3 +++
 ggml-impl.h                                   |  4 +--
 ggml-quants.c                                 |  5 ++--
 ggml-quants.h                                 |  6 ++---
 ggml.c                                        | 10 +++----
 ggml.h                                        |  1 -
 llama.cpp                                     |  9 +++++--
 llama.h                                       |  5 +++-
 pocs/vdot/q8dot.cpp                           | 19 ++++++-------
 pocs/vdot/vdot.cpp                            | 19 ++++++-------
 tests/test-grad0.cpp                          |  3 ++-
 tests/test-grammar-parser.cpp                 |  8 +++++-
 tests/test-llama-grammar.cpp                  |  6 +++++
 tests/test-quantize-fns.cpp                   |  7 +++--
 tests/test-quantize-perf.cpp                  |  8 +++---
 tests/test-rope.cpp                           |  3 ++-
 tests/test-sampling.cpp                       |  8 +-----
 tests/test-tokenizer-0-falcon.cpp             |  8 +++---
 tests/test-tokenizer-0-llama.cpp              |  8 +++---
 tests/test-tokenizer-1-bpe.cpp                | 12 ++++-----
 tests/test-tokenizer-1-llama.cpp              | 11 +++-----
 57 files changed, 298 insertions(+), 222 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index e938dee165d9d..35b22de0feb73 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1,20 +1,28 @@
 #include "common.h"
+#include "ggml.h"
 #include "llama.h"
+#include "log.h"
+#include "sampling.h"
 
 #include <algorithm>
-#include <cassert>
+#include <cctype>
+#include <chrono>
+#include <cinttypes>
 #include <cmath>
-#include <cstring>
+#include <cstdlib>
 #include <ctime>
+#include <exception>
 #include <fstream>
 #include <iterator>
-#include <iostream>
 #include <regex>
 #include <sstream>
+#include <stdexcept>
 #include <string>
+#include <thread>
+#include <unordered_map>
 #include <unordered_set>
+#include <utility>
 #include <vector>
-#include <cinttypes>
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <sys/types.h>
@@ -32,9 +40,7 @@
 #include <fcntl.h>
 #include <io.h>
 #else
-#include <sys/ioctl.h>
 #include <sys/stat.h>
-#include <unistd.h>
 #endif
 
 #if defined(_MSC_VER)
diff --git a/common/common.h b/common/common.h
index 72a49b8901f26..c763be8b06e0a 100644
--- a/common/common.h
+++ b/common/common.h
@@ -3,19 +3,18 @@
 #pragma once
 
 #include "llama.h"
-
 #include "sampling.h"
 
 #define LOG_NO_FILE_LINE_FUNCTION
 #include "log.h"
 
 #include <cmath>
-#include <string>
-#include <vector>
+#include <cstdint>
+#include <cstdio>
 #include <random>
-#include <thread>
-#include <unordered_map>
+#include <string>
 #include <tuple>
+#include <vector>
 
 #ifdef _WIN32
 #define DIRECTORY_SEPARATOR '\\'
@@ -68,7 +67,7 @@ struct gpt_params {
     int32_t yarn_orig_ctx                   = 0;    // YaRN original context length
     int8_t  rope_scaling_type               = LLAMA_ROPE_SCALING_UNSPECIFIED;
 
-    // // sampling parameters
+    // sampling parameters
     struct llama_sampling_params sparams;
 
     std::string model             = "models/7B/ggml-model-f16.gguf"; // model path
diff --git a/common/console.cpp b/common/console.cpp
index f65cbc6eda0b1..69f1419f18ec5 100644
--- a/common/console.cpp
+++ b/common/console.cpp
@@ -14,14 +14,13 @@
 #define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004
 #endif
 #else
-#include <climits>
+#include <clocale>
+#include <cstdio>
+#include <cwchar>
+
 #include <sys/ioctl.h>
-#include <unistd.h>
-#include <wchar.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <signal.h>
 #include <termios.h>
+#include <unistd.h>
 #endif
 
 #define ANSI_COLOR_RED     "\x1b[31m"
diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp
index ff51cc8034c8b..ef56e909dd232 100644
--- a/common/grammar-parser.cpp
+++ b/common/grammar-parser.cpp
@@ -1,6 +1,5 @@
 #include "grammar-parser.h"
 #include <cstdint>
-#include <cwchar>
 #include <string>
 #include <utility>
 #include <stdexcept>
diff --git a/common/grammar-parser.h b/common/grammar-parser.h
index 9037d72728a42..b603764b398bd 100644
--- a/common/grammar-parser.h
+++ b/common/grammar-parser.h
@@ -10,11 +10,14 @@
 // space ::= [ \t\n]*
 
 #pragma once
+
 #include "llama.h"
-#include <vector>
-#include <map>
+
 #include <cstdint>
+#include <cstdio>
+#include <map>
 #include <string>
+#include <vector>
 
 namespace grammar_parser {
     struct parse_state {
diff --git a/common/sampling.cpp b/common/sampling.cpp
index 1317024c2c11c..a5b684ee31571 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -1,5 +1,11 @@
+#include "common.h"
 #include "sampling.h"
 
+#include <algorithm>
+#include <cstdio>
+#include <map>
+#include <utility>
+
 struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params) {
     struct llama_sampling_context * result = new llama_sampling_context();
 
diff --git a/common/sampling.h b/common/sampling.h
index 7c9b8dcf23bcb..03909efbc9c3d 100644
--- a/common/sampling.h
+++ b/common/sampling.h
@@ -1,12 +1,12 @@
 #pragma once
 
-#include "llama.h"
-
 #include "grammar-parser.h"
+#include "llama.h"
 
+#include <cstdint>
 #include <string>
-#include <vector>
 #include <unordered_map>
+#include <vector>
 
 // sampling parameters
 typedef struct llama_sampling_params {
@@ -56,8 +56,6 @@ struct llama_sampling_context {
     std::vector<llama_token_data> cur;
 };
 
-#include "common.h"
-
 // Create a new sampling context instance.
 struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params);
 
diff --git a/common/train.cpp b/common/train.cpp
index bc15b7a03c0cd..d1f5505cb23db 100644
--- a/common/train.cpp
+++ b/common/train.cpp
@@ -1,9 +1,19 @@
-#include "train.h"
 #include "common.h"
+#include "ggml.h"
+#include "llama.h"
+#include "train.h"
 
+#include <algorithm>
+#include <cerrno>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <locale>
 #include <random>
 #include <sstream>
-#include <functional>
 
 struct random_normal_distribution {
     std::mt19937 gen;
diff --git a/common/train.h b/common/train.h
index d86c93cc4f147..ccac6b7d6bdcc 100644
--- a/common/train.h
+++ b/common/train.h
@@ -2,13 +2,14 @@
 
 #pragma once
 
-#include <string>
+#include "llama.h"
+
+#include <cstddef>
+#include <cstdint>
 #include <random>
+#include <string>
 #include <vector>
 
-#include "ggml.h"
-#include "llama.h"
-
 typedef std::string mt19937_state;
 
 struct train_state {
diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp
index 8155101d0ab93..35404f9a3aead 100644
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -1,11 +1,13 @@
 #include "ggml.h"
 #include "train.h"
 
-#include <vector>
+#include <algorithm>
 #include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <random>
 #include <vector>
 
 #if defined(_MSC_VER)
diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp
index 533c55c17aad1..955d815cc39bf 100644
--- a/examples/batched-bench/batched-bench.cpp
+++ b/examples/batched-bench/batched-bench.cpp
@@ -1,10 +1,11 @@
 #include "common.h"
+#include "ggml.h"
 #include "llama.h"
 
 #include <algorithm>
-#include <cmath>
+#include <cstdint>
 #include <cstdio>
-#include <string>
+#include <cstdlib>
 #include <vector>
 
 // mutates the input string
diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp
index 22a4265df77c0..f918727983c3d 100644
--- a/examples/batched/batched.cpp
+++ b/examples/batched/batched.cpp
@@ -1,9 +1,11 @@
 #include "common.h"
+#include "ggml.h"
 #include "llama.h"
 
 #include <algorithm>
-#include <cmath>
+#include <cstdint>
 #include <cstdio>
+#include <cstdlib>
 #include <string>
 #include <vector>
 
diff --git a/examples/beam-search/beam-search.cpp b/examples/beam-search/beam-search.cpp
index 679b382e19b4e..cce2b9916ba7f 100644
--- a/examples/beam-search/beam-search.cpp
+++ b/examples/beam-search/beam-search.cpp
@@ -1,29 +1,14 @@
 #include "common.h"
 #include "llama.h"
 
+#include <algorithm>
 #include <cassert>
-#include <cinttypes>
-#include <cmath>
 #include <cstdio>
-#include <cstring>
-#include <ctime>
-#include <fstream>
 #include <iostream>
 #include <string>
+#include <tuple>
 #include <vector>
 
-#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
-#include <signal.h>
-#include <unistd.h>
-#elif defined (_WIN32)
-#define WIN32_LEAN_AND_MEAN
-#ifndef NOMINMAX
-#   define NOMINMAX
-#endif
-#include <windows.h>
-#include <signal.h>
-#endif
-
 // Used for debugging to print out beam tokens.
 struct ostream_beam_view {
     llama_context * ctx;
diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp
index 76e3f57ccce8e..58bf9a814a579 100644
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -1,20 +1,11 @@
 #include "common.h"
 #include "ggml.h"
 
-#include <locale.h>
-#include <assert.h>
-#include <math.h>
-#include <cstring>
-#include <cstdio>
 #include <cinttypes>
-#include <unordered_map>
-#include <queue>
-#include <string.h>
-#include <cassert>
-#include <fstream>
+#include <cstdio>
+#include <cstdlib>
 #include <string>
-#include <iterator>
-#include <algorithm>
+#include <vector>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
index cae3bf3c3dc65..7ecc15cb5791a 100644
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -1,19 +1,22 @@
+#include "common.h"
 #include "ggml.h"
 #include "llama.h"
-#include "common.h"
 
-#include <unordered_map>
-#include <vector>
+#include <algorithm>
 #include <cassert>
+#include <cerrno>
 #include <climits>
-#include <cstring>
+#include <cmath>
 #include <cstdarg>
-#include <ctime>
-#include <random>
-#include <stdexcept>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
 #include <sstream>
-#include <algorithm>
 #include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
 
 // GGUF keys & tensor names.
 
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 3295cd2400ac3..86f874a526e0a 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -1,7 +1,12 @@
 #include "common.h"
 #include "llama.h"
 
+#include <algorithm>
+#include <cstdio>
 #include <ctime>
+#include <random>
+#include <tuple>
+#include <vector>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp
index d803cfd5cb2d5..a2406de5ab28b 100644
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@@ -1,11 +1,16 @@
-
 #include "common.h"
 #include "ggml.h"
 #include "ggml-alloc.h"
 
-#include <vector>
+#include <algorithm>
+#include <cerrno>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
 #include <string>
 #include <thread>
+#include <vector>
 
 static const size_t tensor_alignment = 32;
 
diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
index 649a3b7c1941e..710ddba96051d 100644
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -1,19 +1,19 @@
-#include "ggml.h"
+#include "common.h"
 #include "ggml-alloc.h"
+#include "ggml.h"
 #include "llama.h"
-#include "common.h"
 #include "train.h"
-#include <unordered_map>
-#include <vector>
-#include <cassert>
-#include <climits>
+
+#include <algorithm>
+#include <cerrno>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
 #include <cstring>
-#include <cstdarg>
 #include <ctime>
-#include <random>
-#include <stdexcept>
-#include <algorithm>
 #include <string>
+#include <vector>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp
index 62f5ce3c16a32..f9caffacb7a4f 100644
--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@@ -1,19 +1,17 @@
 #include "common.h"
-
 #include "console.h"
 #include "llama.h"
-#include "grammar-parser.h"
+#include "sampling.h"
 
-#include <cassert>
-#include <cinttypes>
-#include <cmath>
+#include <algorithm>
 #include <cstdio>
-#include <cstring>
+#include <cstdlib>
 #include <ctime>
 #include <fstream>
-#include <iostream>
+#include <random>
 #include <sstream>
 #include <string>
+#include <tuple>
 #include <vector>
 
 #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 9bd82d565834a..0df6c4c45757f 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -1,26 +1,30 @@
+#include "ggml.h"
+#include "llama.h"
+#include "common.h"
+#include "ggml-cuda.h"
+
 #include <algorithm>
 #include <array>
 #include <cassert>
+#include <cctype>
 #include <chrono>
 #include <cinttypes>
 #include <clocale>
 #include <cmath>
 #include <cstdio>
+#include <cstdlib>
 #include <cstring>
 #include <ctime>
 #include <iterator>
 #include <map>
+#include <memory>
 #include <numeric>
 #include <regex>
 #include <sstream>
 #include <string>
+#include <utility>
 #include <vector>
 
-#include "ggml.h"
-#include "llama.h"
-#include "common.h"
-#include "ggml-cuda.h"
-
 // utils
 static uint64_t get_time_ns() {
     using clock = std::chrono::high_resolution_clock;
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 61932e659543c..03a8e9c463ef2 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -2,21 +2,23 @@
 // so there might be still unnecessary artifacts hanging around
 // I'll gradually clean and extend it
 
-#include <cassert>
+#include "clip.h"
+#include "ggml-alloc.h"
+#include "ggml.h"
+
+#include <algorithm>
+#include <climits>
 #include <cmath>
+#include <cstdarg>
+#include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <fstream>
-#include <iostream>
-#include <map>
 #include <regex>
 #include <stdexcept>
+#include <string>
 #include <vector>
 
-#include "clip.h"
-#include "ggml.h"
-#include "ggml-alloc.h"
-
 #define STB_IMAGE_IMPLEMENTATION
 #include "stb_image.h"
 
diff --git a/examples/llava/clip.h b/examples/llava/clip.h
index 3d7261e299a35..106df0d1a6225 100644
--- a/examples/llava/clip.h
+++ b/examples/llava/clip.h
@@ -1,7 +1,8 @@
 #ifndef CLIP_H
 #define CLIP_H
 
-#include "ggml.h"
+#include <stddef.h>
+#include <stdint.h>
 
 struct clip_ctx;
 
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp
index f0974d5bcf452..2c5a863988f73 100644
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@@ -1,11 +1,14 @@
 #include "clip.h"
-#include "llava-utils.h"
 #include "common.h"
+#include "ggml.h"
 #include "llama.h"
+#include "llava-utils.h"
 
+#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
-#include <vector>
+#include <cstring>
+#include <string>
 
 static void show_additional_info(int /*argc*/, char ** argv) {
     printf("\n example usage: %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 8d985c82ac21a..ce68efae44af9 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -1,18 +1,17 @@
 #include "common.h"
-
 #include "console.h"
 #include "llama.h"
+#include "sampling.h"
 
-#include <cassert>
-#include <cinttypes>
-#include <cmath>
+#include <algorithm>
 #include <cstdio>
-#include <cstring>
+#include <cstdlib>
 #include <ctime>
 #include <fstream>
-#include <iostream>
+#include <random>
 #include <sstream>
 #include <string>
+#include <tuple>
 #include <vector>
 
 #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp
index a78df305f415c..9b107e5f1aba3 100644
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@@ -2,13 +2,20 @@
 // The clients submite requests to the server and they are processed in parallel.
 
 #include "common.h"
+#include "ggml.h"
 #include "llama.h"
+#include "sampling.h"
 
-#include <cmath>
+#include <algorithm>
+#include <cctype>
+#include <cstdint>
 #include <cstdio>
+#include <cstdlib>
+#include <ctime>
+#include <istream>
 #include <string>
+#include <tuple>
 #include <vector>
-#include <ctime>
 
 // trim whitespace from the beginning and end of a string
 static std::string trim(const std::string & str) {
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index de60c5227f7c1..f8e8f8fc5c442 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -1,13 +1,20 @@
 #include "common.h"
 #include "llama.h"
 
+#include <algorithm>
+#include <chrono>
 #include <cmath>
 #include <cstdio>
 #include <cstring>
 #include <ctime>
+#include <iterator>
+#include <mutex>
+#include <random>
 #include <sstream>
+#include <string>
 #include <thread>
-#include <mutex>
+#include <tuple>
+#include <utility>
 #include <vector>
 
 #if defined(_MSC_VER)
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
index 2712824774ae7..d5c3c35919821 100644
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -8,15 +8,16 @@
 #include <cinttypes>
 #include <cmath>
 #include <cstdio>
+#include <cstdlib>
 #include <cstring>
-#include <map>
+#include <iterator>
+#include <mutex>
 #include <numeric>
 #include <regex>
 #include <string>
-#include <unordered_map>
-#include <vector>
 #include <thread>
-#include <mutex>
+#include <utility>
+#include <vector>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index d27ea5e9132fd..ce6863a2b3169 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -1,10 +1,14 @@
 #include "common.h"
 #include "llama.h"
 
+#include <cctype>
+#include <cstdint>
 #include <cstdio>
+#include <cstdlib>
 #include <cstring>
-#include <vector>
+#include <exception>
 #include <string>
+#include <vector>
 
 struct quant_option {
     std::string name;
diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp
index 48d80111010df..622101449a53a 100644
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@@ -1,9 +1,11 @@
 #include "common.h"
 #include "llama.h"
 
-#include <vector>
+#include <cstdint>
 #include <cstdio>
-#include <chrono>
+#include <string>
+#include <tuple>
+#include <vector>
 
 int main(int argc, char ** argv) {
     gpt_params params;
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index fd755327a511d..0da14c9b70517 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1,6 +1,7 @@
 #include "common.h"
+#include "ggml.h"
 #include "llama.h"
-#include "grammar-parser.h"
+#include "sampling.h"
 
 #include "../llava/clip.h"
 
@@ -20,10 +21,28 @@
 #include "completion.js.hpp"
 #include "json-schema-to-grammar.mjs.hpp"
 
-#include <cstddef>
-#include <thread>
-#include <mutex>
+#include <algorithm>
+#include <cctype>
 #include <chrono>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+#include <exception>
+#include <functional>
+#include <istream>
+#include <iterator>
+#include <mutex>
+#include <numeric>
+#include <stdexcept>
+#include <string>
+#include <thread>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+#include <vector>
 
 #ifndef SERVER_VERBOSE
 #define SERVER_VERBOSE 1
diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp
index 374aef6f16189..52b0de48f5e7f 100644
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@@ -1,9 +1,8 @@
 #include "common.h"
+#include "ggml.h"
 #include "llama.h"
 
-#include <cmath>
 #include <cstdio>
-#include <string>
 #include <vector>
 
 int main(int argc, char ** argv) {
diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp
index 798684f66678e..3e65d7e6e2872 100644
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@@ -1,9 +1,13 @@
 #include "common.h"
+#include "ggml.h"
 #include "llama.h"
+#include "sampling.h"
 
-#include <cmath>
+#include <algorithm>
 #include <cstdio>
+#include <cstring>
 #include <string>
+#include <tuple>
 #include <vector>
 
 #define SPEC_VOCAB_MAX_SIZE_DIFFERENCE  100
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 2a257e63215e3..42e5c6b0fbc62 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -1,19 +1,18 @@
-#include "ggml.h"
-#include "ggml-alloc.h"
 #include "common.h"
-#include "train.h"
+#include "ggml-alloc.h"
+#include "ggml.h"
 #include "llama.h"
-#include <unordered_map>
-#include <vector>
-#include <cassert>
-#include <climits>
+#include "train.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
 #include <cstring>
-#include <cstdarg>
 #include <ctime>
-#include <random>
-#include <stdexcept>
-#include <algorithm>
 #include <string>
+#include <vector>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/ggml-alloc.c b/ggml-alloc.c
index 34eba3f830e84..c9fd6e54d91b1 100644
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@@ -1,8 +1,9 @@
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 #include "ggml.h"
+
 #include <assert.h>
-#include <stdarg.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/ggml-alloc.h b/ggml-alloc.h
index e38758878b91a..4ebba6a6b3d57 100644
--- a/ggml-alloc.h
+++ b/ggml-alloc.h
@@ -2,6 +2,9 @@
 
 #include "ggml.h"
 
+#include <stdbool.h>
+#include <stddef.h>
+
 #ifdef  __cplusplus
 extern "C" {
 #endif
diff --git a/ggml-backend.c b/ggml-backend.c
index ca8d83dafe47c..89af304d3cb07 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -1,8 +1,5 @@
 #include "ggml-backend.h"
-#include "ggml-alloc.h"
 
-#include <assert.h>
-#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/ggml-backend.h b/ggml-backend.h
index da134b0dbed51..12618036bdb10 100644
--- a/ggml-backend.h
+++ b/ggml-backend.h
@@ -2,6 +2,9 @@
 
 #include "ggml.h"
 
+#include <stdbool.h>
+#include <stddef.h>
+
 #ifdef  __cplusplus
 extern "C" {
 #endif
diff --git a/ggml-impl.h b/ggml-impl.h
index 5ec18a50c8da5..8a9fb73884c82 100644
--- a/ggml-impl.h
+++ b/ggml-impl.h
@@ -1,9 +1,9 @@
 #pragma once
 
-#include "ggml.h"
-
 // GGML internal header
 
+#include "ggml.h"
+
 #include <assert.h>
 #include <stddef.h>
 #include <stdbool.h>
diff --git a/ggml-quants.c b/ggml-quants.c
index 740be6dc5c798..39f2c27b3c3c6 100644
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -1,10 +1,11 @@
 #include "ggml-quants.h"
 #include "ggml-impl.h"
 
-#include <math.h>
-#include <string.h>
 #include <assert.h>
 #include <float.h>
+#include <math.h>
+#include <stdbool.h>
+#include <string.h>
 
 #ifdef __ARM_NEON
 
diff --git a/ggml-quants.h b/ggml-quants.h
index 70c12c27465e8..f782d54c8e6d9 100644
--- a/ggml-quants.h
+++ b/ggml-quants.h
@@ -1,11 +1,11 @@
 #pragma once
 
-#include "ggml-impl.h"
-
 // GGML internal header
 
+#include "ggml.h"
+
+#include <assert.h>
 #include <stdint.h>
-#include <stddef.h>
 
 #define QK4_0 32
 typedef struct {
diff --git a/ggml.c b/ggml.c
index 605a27940fc81..fca45ad14d5d3 100644
--- a/ggml.c
+++ b/ggml.c
@@ -1,6 +1,7 @@
 #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
 #define _USE_MATH_DEFINES // For M_PI on MSVC
 
+#include "ggml.h"
 #include "ggml-impl.h"
 #include "ggml-quants.h"
 
@@ -20,9 +21,7 @@
 #include <inttypes.h>
 #include <stdio.h>
 #include <float.h>
-#include <limits.h>
 #include <stdarg.h>
-#include <signal.h>
 
 #ifdef GGML_USE_METAL
 #include <unistd.h>
@@ -85,15 +84,14 @@ static int sched_yield (void) {
     return 0;
 }
 #else
+
 #include <pthread.h>
+#include <sched.h>
 #include <stdatomic.h>
+#include <sys/stat.h>
 
 typedef void * thread_ret_t;
 
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
 #endif
 
 #ifdef GGML_USE_CPU_HBM
diff --git a/ggml.h b/ggml.h
index 70eb25a6bf3af..5b27b7ad207fd 100644
--- a/ggml.h
+++ b/ggml.h
@@ -300,7 +300,6 @@ extern "C" {
     GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n);
     GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n);
 
-    struct ggml_object;
     struct ggml_context;
 
     enum ggml_type {
diff --git a/llama.cpp b/llama.cpp
index bb60044b4707f..93cdc98247570 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -52,29 +52,34 @@
 #include <algorithm>
 #include <array>
 #include <cassert>
+#include <cerrno>
 #include <cinttypes>
 #include <climits>
 #include <cmath>
 #include <cstdarg>
-#include <cstddef>
 #include <cstdint>
 #include <cstdio>
+#include <cstdlib>
 #include <cstring>
 #include <ctime>
+#include <exception>
 #include <forward_list>
 #include <fstream>
 #include <functional>
 #include <initializer_list>
+#include <iterator>
+#include <limits>
 #include <map>
 #include <memory>
 #include <mutex>
 #include <numeric>
 #include <queue>
 #include <random>
-#include <regex>
 #include <set>
 #include <sstream>
+#include <stdexcept>
 #include <thread>
+#include <type_traits>
 #include <unordered_map>
 
 #if defined(_MSC_VER)
diff --git a/llama.h b/llama.h
index 3f1becd761688..dd16407c81d2c 100644
--- a/llama.h
+++ b/llama.h
@@ -2,12 +2,14 @@
 #define LLAMA_H
 
 #include "ggml.h"
+
 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
 #define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES
 #else
 #define LLAMA_MAX_DEVICES 1
 #endif // GGML_USE_CUBLAS
+
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -759,8 +761,9 @@ extern "C" {
 // Internal API to be implemented by llama.cpp and used by tests/benchmarks only
 #ifdef LLAMA_API_INTERNAL
 
-#include <vector>
 #include <string>
+#include <utility>
+#include <vector>
 
 struct ggml_tensor;
 
diff --git a/pocs/vdot/q8dot.cpp b/pocs/vdot/q8dot.cpp
index 111770d5519cb..05e85ee300f1c 100644
--- a/pocs/vdot/q8dot.cpp
+++ b/pocs/vdot/q8dot.cpp
@@ -1,16 +1,13 @@
-#include <cstdio>
-#include <type_traits>
-#include <vector>
-#include <random>
+#include "ggml.h"
+
+#include <algorithm>
 #include <chrono>
-#include <cstdlib>
 #include <cmath>
-#include <cassert>
-#include <cstring>
-#include <array>
-#include <type_traits>
-
-#include <ggml.h>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+#include <vector>
 
 constexpr int kVecSize = 1 << 16;
 
diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp
index e96372c4b7107..5b0ebb7067e08 100644
--- a/pocs/vdot/vdot.cpp
+++ b/pocs/vdot/vdot.cpp
@@ -1,14 +1,15 @@
-#include <cstdio>
-#include <vector>
-#include <random>
+#include "ggml.h"
+
+#include <algorithm>
+#include <cassert>
 #include <chrono>
-#include <cstdlib>
 #include <cmath>
-#include <cassert>
-#include <cstring>
-#include <array>
-
-#include <ggml.h>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+#include <utility>
+#include <vector>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp
index 0a559b27ab370..49a92ed23481b 100644
--- a/tests/test-grad0.cpp
+++ b/tests/test-grad0.cpp
@@ -1,10 +1,11 @@
 #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
 #include "ggml.h"
 
+#include <cassert>
 #include <cmath>
+#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
-#include <cassert>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp
index a0b5b043df868..b4d825b6271e2 100644
--- a/tests/test-grammar-parser.cpp
+++ b/tests/test-grammar-parser.cpp
@@ -2,10 +2,16 @@
 #undef NDEBUG
 #endif
 
-#include "llama.h"
 #include "grammar-parser.h"
+#include "llama.h"
 
 #include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
 
 int main()
 {
diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp
index 73dd33dd286a5..cbbe95bd3e6a1 100644
--- a/tests/test-llama-grammar.cpp
+++ b/tests/test-llama-grammar.cpp
@@ -6,6 +6,12 @@
 #include "grammar-parser.h"
 
 #include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
 
 int main()
 {
diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp
index a2459a2867c5c..3b88b49890bd4 100644
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@@ -2,10 +2,9 @@
 
 #include "ggml.h"
 
-#undef NDEBUG
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
 #include <string>
 #include <vector>
 
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp
index 88fac0e23106b..b4260e54882ea 100644
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -2,14 +2,12 @@
 
 #include "ggml.h"
 
-#undef NDEBUG
 #include <algorithm>
-#include <assert.h>
+#include <cinttypes>
+#include <cmath>
+#include <cstdio>
 #include <functional>
-#include <inttypes.h>
-#include <math.h>
 #include <memory>
-#include <stdio.h>
 #include <string>
 #include <vector>
 
diff --git a/tests/test-rope.cpp b/tests/test-rope.cpp
index 26c1f42dc0e95..2fda30ccba531 100644
--- a/tests/test-rope.cpp
+++ b/tests/test-rope.cpp
@@ -1,9 +1,10 @@
 #include "ggml.h"
 
+#include <cassert>
 #include <cmath>
+#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
-#include <cassert>
 #include <vector>
 
 #if defined(_MSC_VER)
diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp
index 32e58941c0ee0..63ae06d309452 100644
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@@ -1,15 +1,9 @@
 #include "ggml.h"
 #include "llama.h"
 
-#ifdef NDEBUG
-#undef NDEBUG
-#endif
-
 #include <cmath>
-#include <numeric>
-#include <cassert>
+#include <cstdio>
 #include <vector>
-#include <algorithm>
 
 static void dump(const llama_token_data_array * candidates) {
     for (size_t i = 0; i < candidates->size; i++) {
diff --git a/tests/test-tokenizer-0-falcon.cpp b/tests/test-tokenizer-0-falcon.cpp
index a4e9d2b912728..d2f983baa5402 100644
--- a/tests/test-tokenizer-0-falcon.cpp
+++ b/tests/test-tokenizer-0-falcon.cpp
@@ -1,12 +1,14 @@
-#include "llama.h"
 #include "common.h"
 #include "console.h"
+#include "llama.h"
 
 #include <cstdio>
-#include <string>
+#include <fstream>
+#include <iterator>
 #include <map>
+#include <string>
+#include <utility>
 #include <vector>
-#include <fstream>
 
 // generate using test-tokenizer-0-falcon.py
 static const std::map<std::string, std::vector<llama_token>> & k_tests() {
diff --git a/tests/test-tokenizer-0-llama.cpp b/tests/test-tokenizer-0-llama.cpp
index 39c8d188c9086..c00a668de95f4 100644
--- a/tests/test-tokenizer-0-llama.cpp
+++ b/tests/test-tokenizer-0-llama.cpp
@@ -1,12 +1,14 @@
-#include "llama.h"
 #include "common.h"
 #include "console.h"
+#include "llama.h"
 
 #include <cstdio>
-#include <string>
+#include <fstream>
+#include <iterator>
 #include <map>
+#include <string>
+#include <utility>
 #include <vector>
-#include <fstream>
 
 // generate using test-tokenizer-0-llama.py
 static const std::map<std::string, std::vector<llama_token>> & k_tests() {
diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp
index 386530f23f92c..cc924da5ce56d 100644
--- a/tests/test-tokenizer-1-bpe.cpp
+++ b/tests/test-tokenizer-1-bpe.cpp
@@ -1,16 +1,14 @@
-#include "llama.h"
 #include "common.h"
-#include "unicode.h"
 #include "console.h"
+#include "ggml.h"
+#include "llama.h"
+#include "unicode.h"
 
-#include <cassert>
+#include <cstdint>
 #include <cstdio>
-#include <cstring>
+#include <stdexcept>
 #include <string>
-#include <codecvt>
-#include <map>
 #include <vector>
-#include <locale>
 
 int main(int argc, char **argv) {
     if (argc < 2) {
diff --git a/tests/test-tokenizer-1-llama.cpp b/tests/test-tokenizer-1-llama.cpp
index 4b58fe4954cf3..95832f899cafc 100644
--- a/tests/test-tokenizer-1-llama.cpp
+++ b/tests/test-tokenizer-1-llama.cpp
@@ -1,16 +1,13 @@
-#include "llama.h"
 #include "common.h"
-#include "unicode.h"
 #include "console.h"
+#include "ggml.h"
+#include "llama.h"
+#include "unicode.h"
 
-#include <cassert>
+#include <cstdint>
 #include <cstdio>
-#include <cstring>
 #include <string>
-#include <codecvt>
-#include <map>
 #include <vector>
-#include <locale>
 
 int main(int argc, char **argv) {
     if (argc < 2) {

From a9162dd01feff4fab6881ce327bdce2f0f114cef Mon Sep 17 00:00:00 2001
From: cebtenzzre <cebtenzzre@gmail.com>
Date: Thu, 2 Nov 2023 17:54:57 -0400
Subject: [PATCH 3/5] make : remove unneeded deps and add test-rope target

---
 .gitignore | 25 +++++++++++++------------
 Makefile   | 23 +++++++++++++----------
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/.gitignore b/.gitignore
index 50cbd0b47cae3..9556fec9b7f8f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -86,15 +86,16 @@ poetry.lock
 poetry.toml
 
 # Test binaries
-tests/test-grammar-parser
-tests/test-llama-grammar
-tests/test-double-float
-tests/test-grad0
-tests/test-opt
-tests/test-quantize-fns
-tests/test-quantize-perf
-tests/test-sampling
-tests/test-tokenizer-0-llama
-tests/test-tokenizer-0-falcon
-tests/test-tokenizer-1-llama
-tests/test-tokenizer-1-bpe
+/tests/test-grammar-parser
+/tests/test-llama-grammar
+/tests/test-double-float
+/tests/test-grad0
+/tests/test-opt
+/tests/test-quantize-fns
+/tests/test-quantize-perf
+/tests/test-sampling
+/tests/test-tokenizer-0-llama
+/tests/test-tokenizer-0-falcon
+/tests/test-tokenizer-1-llama
+/tests/test-tokenizer-1-bpe
+/tests/test-rope
diff --git a/Makefile b/Makefile
index 300c1e6c7e127..c8a82a8573d0e 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ BUILD_TARGETS = \
 TEST_TARGETS = \
 	tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
 	tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama          \
-	tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
+	tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope
 
 # Code coverage output files
 COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -629,7 +629,7 @@ beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS)
 finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
-export-lora: examples/export-lora/export-lora.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
 speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
@@ -679,28 +679,28 @@ vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
 q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
 
-tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
+tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
-tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
+tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
-tests/test-double-float: tests/test-double-float.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
-tests/test-grad0: tests/test-grad0.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
-tests/test-opt: tests/test-opt.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
-tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
-tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
-tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
 tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
@@ -715,5 +715,8 @@ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMM
 tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
+tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
 tests/test-c.o: tests/test-c.c llama.h
 	$(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@

From f30b4e69d1668971dd00bae361f419a7b598b798 Mon Sep 17 00:00:00 2001
From: cebtenzzre <cebtenzzre@gmail.com>
Date: Thu, 2 Nov 2023 18:01:13 -0400
Subject: [PATCH 4/5] fix C includes in C++ source files

---
 ggml-opencl.cpp | 12 +++++-------
 llama.cpp       |  1 -
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp
index 202bcb4853893..496f9cdca542d 100644
--- a/ggml-opencl.cpp
+++ b/ggml-opencl.cpp
@@ -1,20 +1,18 @@
+#include "ggml.h"
 #include "ggml-opencl.h"
 
 #include <array>
 #include <atomic>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
 #include <sstream>
 #include <vector>
-#include <limits>
 
 #define CL_TARGET_OPENCL_VERSION 110
 #include <clblast.h>
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "ggml.h"
-
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
diff --git a/llama.cpp b/llama.cpp
index 93cdc98247570..a6353fcec6581 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -46,7 +46,6 @@
     #endif
     #include <windows.h>
     #include <io.h>
-    #include <stdio.h> // for _fseeki64
 #endif
 
 #include <algorithm>

From f595b697980223699c6c881711182bfe8451d4ee Mon Sep 17 00:00:00 2001
From: Jared Van Bortel <jared@nomic.ai>
Date: Thu, 30 Nov 2023 17:11:31 -0500
Subject: [PATCH 5/5] Revert "fix includes with help from include-what-you-use"

This reverts commit 635e9fadfd516d4604a0fecf4a854bfb25ad17ae.
---
 common/common.cpp                             | 18 +++++--------
 common/common.h                               | 11 ++++----
 common/console.cpp                            | 11 ++++----
 common/grammar-parser.cpp                     |  1 +
 common/grammar-parser.h                       |  7 ++---
 common/sampling.cpp                           |  6 -----
 common/sampling.h                             |  8 +++---
 common/train.cpp                              | 14 ++--------
 common/train.h                                |  9 +++----
 examples/baby-llama/baby-llama.cpp            |  6 ++---
 examples/batched-bench/batched-bench.cpp      |  5 ++--
 examples/batched/batched.cpp                  |  4 +--
 examples/beam-search/beam-search.cpp          | 19 +++++++++++--
 examples/benchmark/benchmark-matmult.cpp      | 15 ++++++++---
 .../convert-llama2c-to-ggml.cpp               | 19 ++++++-------
 examples/embedding/embedding.cpp              |  5 ----
 examples/export-lora/export-lora.cpp          |  9 ++-----
 examples/finetune/finetune.cpp                | 20 +++++++-------
 examples/infill/infill.cpp                    | 12 +++++----
 examples/llama-bench/llama-bench.cpp          | 14 ++++------
 examples/llava/clip.cpp                       | 16 +++++------
 examples/llava/clip.h                         |  3 +--
 examples/llava/llava.cpp                      |  7 ++---
 examples/main/main.cpp                        | 11 ++++----
 examples/parallel/parallel.cpp                | 11 ++------
 examples/perplexity/perplexity.cpp            |  9 +------
 examples/quantize-stats/quantize-stats.cpp    |  9 +++----
 examples/quantize/quantize.cpp                |  6 +----
 examples/save-load-state/save-load-state.cpp  |  6 ++---
 examples/server/server.cpp                    | 27 +++----------------
 examples/simple/simple.cpp                    |  3 ++-
 examples/speculative/speculative.cpp          |  6 +----
 .../train-text-from-scratch.cpp               | 21 ++++++++-------
 ggml-alloc.c                                  |  3 +--
 ggml-alloc.h                                  |  3 ---
 ggml-backend.c                                |  3 +++
 ggml-backend.h                                |  3 ---
 ggml-impl.h                                   |  4 +--
 ggml-quants.c                                 |  5 ++--
 ggml-quants.h                                 |  6 ++---
 ggml.c                                        | 10 ++++---
 ggml.h                                        |  1 +
 llama.cpp                                     |  9 ++-----
 llama.h                                       |  5 +---
 pocs/vdot/q8dot.cpp                           | 19 +++++++------
 pocs/vdot/vdot.cpp                            | 19 +++++++------
 tests/test-grad0.cpp                          |  3 +--
 tests/test-grammar-parser.cpp                 |  8 +-----
 tests/test-llama-grammar.cpp                  |  6 -----
 tests/test-quantize-fns.cpp                   |  7 ++---
 tests/test-quantize-perf.cpp                  |  8 +++---
 tests/test-rope.cpp                           |  3 +--
 tests/test-sampling.cpp                       |  8 +++++-
 tests/test-tokenizer-0-falcon.cpp             |  8 +++---
 tests/test-tokenizer-0-llama.cpp              |  8 +++---
 tests/test-tokenizer-1-bpe.cpp                | 12 +++++----
 tests/test-tokenizer-1-llama.cpp              | 11 +++++---
 57 files changed, 222 insertions(+), 298 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index 35b22de0feb73..e938dee165d9d 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1,28 +1,20 @@
 #include "common.h"
-#include "ggml.h"
 #include "llama.h"
-#include "log.h"
-#include "sampling.h"
 
 #include <algorithm>
-#include <cctype>
-#include <chrono>
-#include <cinttypes>
+#include <cassert>
 #include <cmath>
-#include <cstdlib>
+#include <cstring>
 #include <ctime>
-#include <exception>
 #include <fstream>
 #include <iterator>
+#include <iostream>
 #include <regex>
 #include <sstream>
-#include <stdexcept>
 #include <string>
-#include <thread>
-#include <unordered_map>
 #include <unordered_set>
-#include <utility>
 #include <vector>
+#include <cinttypes>
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <sys/types.h>
@@ -40,7 +32,9 @@
 #include <fcntl.h>
 #include <io.h>
 #else
+#include <sys/ioctl.h>
 #include <sys/stat.h>
+#include <unistd.h>
 #endif
 
 #if defined(_MSC_VER)
diff --git a/common/common.h b/common/common.h
index c763be8b06e0a..72a49b8901f26 100644
--- a/common/common.h
+++ b/common/common.h
@@ -3,18 +3,19 @@
 #pragma once
 
 #include "llama.h"
+
 #include "sampling.h"
 
 #define LOG_NO_FILE_LINE_FUNCTION
 #include "log.h"
 
 #include <cmath>
-#include <cstdint>
-#include <cstdio>
-#include <random>
 #include <string>
-#include <tuple>
 #include <vector>
+#include <random>
+#include <thread>
+#include <unordered_map>
+#include <tuple>
 
 #ifdef _WIN32
 #define DIRECTORY_SEPARATOR '\\'
@@ -67,7 +68,7 @@ struct gpt_params {
     int32_t yarn_orig_ctx                   = 0;    // YaRN original context length
     int8_t  rope_scaling_type               = LLAMA_ROPE_SCALING_UNSPECIFIED;
 
-    // sampling parameters
+    // // sampling parameters
     struct llama_sampling_params sparams;
 
     std::string model             = "models/7B/ggml-model-f16.gguf"; // model path
diff --git a/common/console.cpp b/common/console.cpp
index 69f1419f18ec5..f65cbc6eda0b1 100644
--- a/common/console.cpp
+++ b/common/console.cpp
@@ -14,13 +14,14 @@
 #define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004
 #endif
 #else
-#include <clocale>
-#include <cstdio>
-#include <cwchar>
-
+#include <climits>
 #include <sys/ioctl.h>
-#include <termios.h>
 #include <unistd.h>
+#include <wchar.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <termios.h>
 #endif
 
 #define ANSI_COLOR_RED     "\x1b[31m"
diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp
index ef56e909dd232..ff51cc8034c8b 100644
--- a/common/grammar-parser.cpp
+++ b/common/grammar-parser.cpp
@@ -1,5 +1,6 @@
 #include "grammar-parser.h"
 #include <cstdint>
+#include <cwchar>
 #include <string>
 #include <utility>
 #include <stdexcept>
diff --git a/common/grammar-parser.h b/common/grammar-parser.h
index b603764b398bd..9037d72728a42 100644
--- a/common/grammar-parser.h
+++ b/common/grammar-parser.h
@@ -10,14 +10,11 @@
 // space ::= [ \t\n]*
 
 #pragma once
-
 #include "llama.h"
-
-#include <cstdint>
-#include <cstdio>
+#include <vector>
 #include <map>
+#include <cstdint>
 #include <string>
-#include <vector>
 
 namespace grammar_parser {
     struct parse_state {
diff --git a/common/sampling.cpp b/common/sampling.cpp
index a5b684ee31571..1317024c2c11c 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -1,11 +1,5 @@
-#include "common.h"
 #include "sampling.h"
 
-#include <algorithm>
-#include <cstdio>
-#include <map>
-#include <utility>
-
 struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params) {
     struct llama_sampling_context * result = new llama_sampling_context();
 
diff --git a/common/sampling.h b/common/sampling.h
index 03909efbc9c3d..7c9b8dcf23bcb 100644
--- a/common/sampling.h
+++ b/common/sampling.h
@@ -1,12 +1,12 @@
 #pragma once
 
-#include "grammar-parser.h"
 #include "llama.h"
 
-#include <cstdint>
+#include "grammar-parser.h"
+
 #include <string>
-#include <unordered_map>
 #include <vector>
+#include <unordered_map>
 
 // sampling parameters
 typedef struct llama_sampling_params {
@@ -56,6 +56,8 @@ struct llama_sampling_context {
     std::vector<llama_token_data> cur;
 };
 
+#include "common.h"
+
 // Create a new sampling context instance.
 struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params);
 
diff --git a/common/train.cpp b/common/train.cpp
index d1f5505cb23db..bc15b7a03c0cd 100644
--- a/common/train.cpp
+++ b/common/train.cpp
@@ -1,19 +1,9 @@
-#include "common.h"
-#include "ggml.h"
-#include "llama.h"
 #include "train.h"
+#include "common.h"
 
-#include <algorithm>
-#include <cerrno>
-#include <cmath>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <functional>
-#include <locale>
 #include <random>
 #include <sstream>
+#include <functional>
 
 struct random_normal_distribution {
     std::mt19937 gen;
diff --git a/common/train.h b/common/train.h
index ccac6b7d6bdcc..d86c93cc4f147 100644
--- a/common/train.h
+++ b/common/train.h
@@ -2,14 +2,13 @@
 
 #pragma once
 
-#include "llama.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <random>
 #include <string>
+#include <random>
 #include <vector>
 
+#include "ggml.h"
+#include "llama.h"
+
 typedef std::string mt19937_state;
 
 struct train_state {
diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp
index 35404f9a3aead..8155101d0ab93 100644
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -1,13 +1,11 @@
 #include "ggml.h"
 #include "train.h"
 
-#include <algorithm>
+#include <vector>
 #include <cassert>
-#include <cmath>
-#include <cstdint>
-#include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <random>
 #include <vector>
 
 #if defined(_MSC_VER)
diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp
index 955d815cc39bf..533c55c17aad1 100644
--- a/examples/batched-bench/batched-bench.cpp
+++ b/examples/batched-bench/batched-bench.cpp
@@ -1,11 +1,10 @@
 #include "common.h"
-#include "ggml.h"
 #include "llama.h"
 
 #include <algorithm>
-#include <cstdint>
+#include <cmath>
 #include <cstdio>
-#include <cstdlib>
+#include <string>
 #include <vector>
 
 // mutates the input string
diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp
index f918727983c3d..22a4265df77c0 100644
--- a/examples/batched/batched.cpp
+++ b/examples/batched/batched.cpp
@@ -1,11 +1,9 @@
 #include "common.h"
-#include "ggml.h"
 #include "llama.h"
 
 #include <algorithm>
-#include <cstdint>
+#include <cmath>
 #include <cstdio>
-#include <cstdlib>
 #include <string>
 #include <vector>
 
diff --git a/examples/beam-search/beam-search.cpp b/examples/beam-search/beam-search.cpp
index cce2b9916ba7f..679b382e19b4e 100644
--- a/examples/beam-search/beam-search.cpp
+++ b/examples/beam-search/beam-search.cpp
@@ -1,14 +1,29 @@
 #include "common.h"
 #include "llama.h"
 
-#include <algorithm>
 #include <cassert>
+#include <cinttypes>
+#include <cmath>
 #include <cstdio>
+#include <cstring>
+#include <ctime>
+#include <fstream>
 #include <iostream>
 #include <string>
-#include <tuple>
 #include <vector>
 
+#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
+#include <signal.h>
+#include <unistd.h>
+#elif defined (_WIN32)
+#define WIN32_LEAN_AND_MEAN
+#ifndef NOMINMAX
+#   define NOMINMAX
+#endif
+#include <windows.h>
+#include <signal.h>
+#endif
+
 // Used for debugging to print out beam tokens.
 struct ostream_beam_view {
     llama_context * ctx;
diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp
index 58bf9a814a579..76e3f57ccce8e 100644
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -1,11 +1,20 @@
 #include "common.h"
 #include "ggml.h"
 
-#include <cinttypes>
+#include <locale.h>
+#include <assert.h>
+#include <math.h>
+#include <cstring>
 #include <cstdio>
-#include <cstdlib>
+#include <cinttypes>
+#include <unordered_map>
+#include <queue>
+#include <string.h>
+#include <cassert>
+#include <fstream>
 #include <string>
-#include <vector>
+#include <iterator>
+#include <algorithm>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
index 7ecc15cb5791a..cae3bf3c3dc65 100644
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -1,22 +1,19 @@
-#include "common.h"
 #include "ggml.h"
 #include "llama.h"
+#include "common.h"
 
-#include <algorithm>
+#include <unordered_map>
+#include <vector>
 #include <cassert>
-#include <cerrno>
 #include <climits>
-#include <cmath>
-#include <cstdarg>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
 #include <cstring>
+#include <cstdarg>
+#include <ctime>
+#include <random>
+#include <stdexcept>
 #include <sstream>
+#include <algorithm>
 #include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
 
 // GGUF keys & tensor names.
 
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 86f874a526e0a..3295cd2400ac3 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -1,12 +1,7 @@
 #include "common.h"
 #include "llama.h"
 
-#include <algorithm>
-#include <cstdio>
 #include <ctime>
-#include <random>
-#include <tuple>
-#include <vector>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp
index a2406de5ab28b..d803cfd5cb2d5 100644
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@@ -1,16 +1,11 @@
+
 #include "common.h"
 #include "ggml.h"
 #include "ggml-alloc.h"
 
-#include <algorithm>
-#include <cerrno>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
+#include <vector>
 #include <string>
 #include <thread>
-#include <vector>
 
 static const size_t tensor_alignment = 32;
 
diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
index 710ddba96051d..649a3b7c1941e 100644
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -1,19 +1,19 @@
-#include "common.h"
-#include "ggml-alloc.h"
 #include "ggml.h"
+#include "ggml-alloc.h"
 #include "llama.h"
+#include "common.h"
 #include "train.h"
-
-#include <algorithm>
-#include <cerrno>
-#include <cmath>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
+#include <unordered_map>
+#include <vector>
+#include <cassert>
+#include <climits>
 #include <cstring>
+#include <cstdarg>
 #include <ctime>
+#include <random>
+#include <stdexcept>
+#include <algorithm>
 #include <string>
-#include <vector>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp
index f9caffacb7a4f..62f5ce3c16a32 100644
--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@@ -1,17 +1,19 @@
 #include "common.h"
+
 #include "console.h"
 #include "llama.h"
-#include "sampling.h"
+#include "grammar-parser.h"
 
-#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cmath>
 #include <cstdio>
-#include <cstdlib>
+#include <cstring>
 #include <ctime>
 #include <fstream>
-#include <random>
+#include <iostream>
 #include <sstream>
 #include <string>
-#include <tuple>
 #include <vector>
 
 #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 0df6c4c45757f..9bd82d565834a 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -1,30 +1,26 @@
-#include "ggml.h"
-#include "llama.h"
-#include "common.h"
-#include "ggml-cuda.h"
-
 #include <algorithm>
 #include <array>
 #include <cassert>
-#include <cctype>
 #include <chrono>
 #include <cinttypes>
 #include <clocale>
 #include <cmath>
 #include <cstdio>
-#include <cstdlib>
 #include <cstring>
 #include <ctime>
 #include <iterator>
 #include <map>
-#include <memory>
 #include <numeric>
 #include <regex>
 #include <sstream>
 #include <string>
-#include <utility>
 #include <vector>
 
+#include "ggml.h"
+#include "llama.h"
+#include "common.h"
+#include "ggml-cuda.h"
+
 // utils
 static uint64_t get_time_ns() {
     using clock = std::chrono::high_resolution_clock;
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 03a8e9c463ef2..61932e659543c 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -2,23 +2,21 @@
 // so there might be still unnecessary artifacts hanging around
 // I'll gradually clean and extend it
 
-#include "clip.h"
-#include "ggml-alloc.h"
-#include "ggml.h"
-
-#include <algorithm>
-#include <climits>
+#include <cassert>
 #include <cmath>
-#include <cstdarg>
-#include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include <fstream>
+#include <iostream>
+#include <map>
 #include <regex>
 #include <stdexcept>
-#include <string>
 #include <vector>
 
+#include "clip.h"
+#include "ggml.h"
+#include "ggml-alloc.h"
+
 #define STB_IMAGE_IMPLEMENTATION
 #include "stb_image.h"
 
diff --git a/examples/llava/clip.h b/examples/llava/clip.h
index 106df0d1a6225..3d7261e299a35 100644
--- a/examples/llava/clip.h
+++ b/examples/llava/clip.h
@@ -1,8 +1,7 @@
 #ifndef CLIP_H
 #define CLIP_H
 
-#include <stddef.h>
-#include <stdint.h>
+#include "ggml.h"
 
 struct clip_ctx;
 
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp
index 2c5a863988f73..f0974d5bcf452 100644
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@@ -1,14 +1,11 @@
 #include "clip.h"
+#include "llava-utils.h"
 #include "common.h"
-#include "ggml.h"
 #include "llama.h"
-#include "llava-utils.h"
 
-#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
-#include <cstring>
-#include <string>
+#include <vector>
 
 static void show_additional_info(int /*argc*/, char ** argv) {
     printf("\n example usage: %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index ce68efae44af9..8d985c82ac21a 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -1,17 +1,18 @@
 #include "common.h"
+
 #include "console.h"
 #include "llama.h"
-#include "sampling.h"
 
-#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cmath>
 #include <cstdio>
-#include <cstdlib>
+#include <cstring>
 #include <ctime>
 #include <fstream>
-#include <random>
+#include <iostream>
 #include <sstream>
 #include <string>
-#include <tuple>
 #include <vector>
 
 #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp
index 9b107e5f1aba3..a78df305f415c 100644
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@@ -2,20 +2,13 @@
 // The clients submite requests to the server and they are processed in parallel.
 
 #include "common.h"
-#include "ggml.h"
 #include "llama.h"
-#include "sampling.h"
 
-#include <algorithm>
-#include <cctype>
-#include <cstdint>
+#include <cmath>
 #include <cstdio>
-#include <cstdlib>
-#include <ctime>
-#include <istream>
 #include <string>
-#include <tuple>
 #include <vector>
+#include <ctime>
 
 // trim whitespace from the beginning and end of a string
 static std::string trim(const std::string & str) {
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index f8e8f8fc5c442..de60c5227f7c1 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -1,20 +1,13 @@
 #include "common.h"
 #include "llama.h"
 
-#include <algorithm>
-#include <chrono>
 #include <cmath>
 #include <cstdio>
 #include <cstring>
 #include <ctime>
-#include <iterator>
-#include <mutex>
-#include <random>
 #include <sstream>
-#include <string>
 #include <thread>
-#include <tuple>
-#include <utility>
+#include <mutex>
 #include <vector>
 
 #if defined(_MSC_VER)
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
index d5c3c35919821..2712824774ae7 100644
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -8,16 +8,15 @@
 #include <cinttypes>
 #include <cmath>
 #include <cstdio>
-#include <cstdlib>
 #include <cstring>
-#include <iterator>
-#include <mutex>
+#include <map>
 #include <numeric>
 #include <regex>
 #include <string>
-#include <thread>
-#include <utility>
+#include <unordered_map>
 #include <vector>
+#include <thread>
+#include <mutex>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index ce6863a2b3169..d27ea5e9132fd 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -1,14 +1,10 @@
 #include "common.h"
 #include "llama.h"
 
-#include <cctype>
-#include <cstdint>
 #include <cstdio>
-#include <cstdlib>
 #include <cstring>
-#include <exception>
-#include <string>
 #include <vector>
+#include <string>
 
 struct quant_option {
     std::string name;
diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp
index 622101449a53a..48d80111010df 100644
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@@ -1,11 +1,9 @@
 #include "common.h"
 #include "llama.h"
 
-#include <cstdint>
-#include <cstdio>
-#include <string>
-#include <tuple>
 #include <vector>
+#include <cstdio>
+#include <chrono>
 
 int main(int argc, char ** argv) {
     gpt_params params;
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 0da14c9b70517..fd755327a511d 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1,7 +1,6 @@
 #include "common.h"
-#include "ggml.h"
 #include "llama.h"
-#include "sampling.h"
+#include "grammar-parser.h"
 
 #include "../llava/clip.h"
 
@@ -21,28 +20,10 @@
 #include "completion.js.hpp"
 #include "json-schema-to-grammar.mjs.hpp"
 
-#include <algorithm>
-#include <cctype>
-#include <chrono>
-#include <cmath>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <ctime>
-#include <exception>
-#include <functional>
-#include <istream>
-#include <iterator>
-#include <mutex>
-#include <numeric>
-#include <stdexcept>
-#include <string>
+#include <cstddef>
 #include <thread>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-#include <vector>
+#include <mutex>
+#include <chrono>
 
 #ifndef SERVER_VERBOSE
 #define SERVER_VERBOSE 1
diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp
index 52b0de48f5e7f..374aef6f16189 100644
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@@ -1,8 +1,9 @@
 #include "common.h"
-#include "ggml.h"
 #include "llama.h"
 
+#include <cmath>
 #include <cstdio>
+#include <string>
 #include <vector>
 
 int main(int argc, char ** argv) {
diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp
index 3e65d7e6e2872..798684f66678e 100644
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@@ -1,13 +1,9 @@
 #include "common.h"
-#include "ggml.h"
 #include "llama.h"
-#include "sampling.h"
 
-#include <algorithm>
+#include <cmath>
 #include <cstdio>
-#include <cstring>
 #include <string>
-#include <tuple>
 #include <vector>
 
 #define SPEC_VOCAB_MAX_SIZE_DIFFERENCE  100
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 42e5c6b0fbc62..2a257e63215e3 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -1,18 +1,19 @@
-#include "common.h"
-#include "ggml-alloc.h"
 #include "ggml.h"
-#include "llama.h"
+#include "ggml-alloc.h"
+#include "common.h"
 #include "train.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
+#include "llama.h"
+#include <unordered_map>
+#include <vector>
+#include <cassert>
+#include <climits>
 #include <cstring>
+#include <cstdarg>
 #include <ctime>
+#include <random>
+#include <stdexcept>
+#include <algorithm>
 #include <string>
-#include <vector>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/ggml-alloc.c b/ggml-alloc.c
index c9fd6e54d91b1..34eba3f830e84 100644
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@@ -1,9 +1,8 @@
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 #include "ggml.h"
-
 #include <assert.h>
-#include <stdint.h>
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/ggml-alloc.h b/ggml-alloc.h
index 4ebba6a6b3d57..e38758878b91a 100644
--- a/ggml-alloc.h
+++ b/ggml-alloc.h
@@ -2,9 +2,6 @@
 
 #include "ggml.h"
 
-#include <stdbool.h>
-#include <stddef.h>
-
 #ifdef  __cplusplus
 extern "C" {
 #endif
diff --git a/ggml-backend.c b/ggml-backend.c
index 89af304d3cb07..ca8d83dafe47c 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -1,5 +1,8 @@
 #include "ggml-backend.h"
+#include "ggml-alloc.h"
 
+#include <assert.h>
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/ggml-backend.h b/ggml-backend.h
index 12618036bdb10..da134b0dbed51 100644
--- a/ggml-backend.h
+++ b/ggml-backend.h
@@ -2,9 +2,6 @@
 
 #include "ggml.h"
 
-#include <stdbool.h>
-#include <stddef.h>
-
 #ifdef  __cplusplus
 extern "C" {
 #endif
diff --git a/ggml-impl.h b/ggml-impl.h
index 8a9fb73884c82..5ec18a50c8da5 100644
--- a/ggml-impl.h
+++ b/ggml-impl.h
@@ -1,9 +1,9 @@
 #pragma once
 
-// GGML internal header
-
 #include "ggml.h"
 
+// GGML internal header
+
 #include <assert.h>
 #include <stddef.h>
 #include <stdbool.h>
diff --git a/ggml-quants.c b/ggml-quants.c
index 39f2c27b3c3c6..740be6dc5c798 100644
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -1,11 +1,10 @@
 #include "ggml-quants.h"
 #include "ggml-impl.h"
 
-#include <assert.h>
-#include <float.h>
 #include <math.h>
-#include <stdbool.h>
 #include <string.h>
+#include <assert.h>
+#include <float.h>
 
 #ifdef __ARM_NEON
 
diff --git a/ggml-quants.h b/ggml-quants.h
index f782d54c8e6d9..70c12c27465e8 100644
--- a/ggml-quants.h
+++ b/ggml-quants.h
@@ -1,11 +1,11 @@
 #pragma once
 
-// GGML internal header
+#include "ggml-impl.h"
 
-#include "ggml.h"
+// GGML internal header
 
-#include <assert.h>
 #include <stdint.h>
+#include <stddef.h>
 
 #define QK4_0 32
 typedef struct {
diff --git a/ggml.c b/ggml.c
index fca45ad14d5d3..605a27940fc81 100644
--- a/ggml.c
+++ b/ggml.c
@@ -1,7 +1,6 @@
 #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
 #define _USE_MATH_DEFINES // For M_PI on MSVC
 
-#include "ggml.h"
 #include "ggml-impl.h"
 #include "ggml-quants.h"
 
@@ -21,7 +20,9 @@
 #include <inttypes.h>
 #include <stdio.h>
 #include <float.h>
+#include <limits.h>
 #include <stdarg.h>
+#include <signal.h>
 
 #ifdef GGML_USE_METAL
 #include <unistd.h>
@@ -84,14 +85,15 @@ static int sched_yield (void) {
     return 0;
 }
 #else
-
 #include <pthread.h>
-#include <sched.h>
 #include <stdatomic.h>
-#include <sys/stat.h>
 
 typedef void * thread_ret_t;
 
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
 #endif
 
 #ifdef GGML_USE_CPU_HBM
diff --git a/ggml.h b/ggml.h
index 5b27b7ad207fd..70eb25a6bf3af 100644
--- a/ggml.h
+++ b/ggml.h
@@ -300,6 +300,7 @@ extern "C" {
     GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n);
     GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n);
 
+    struct ggml_object;
     struct ggml_context;
 
     enum ggml_type {
diff --git a/llama.cpp b/llama.cpp
index a6353fcec6581..518aa5b98dc39 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -51,34 +51,29 @@
 #include <algorithm>
 #include <array>
 #include <cassert>
-#include <cerrno>
 #include <cinttypes>
 #include <climits>
 #include <cmath>
 #include <cstdarg>
+#include <cstddef>
 #include <cstdint>
 #include <cstdio>
-#include <cstdlib>
 #include <cstring>
 #include <ctime>
-#include <exception>
 #include <forward_list>
 #include <fstream>
 #include <functional>
 #include <initializer_list>
-#include <iterator>
-#include <limits>
 #include <map>
 #include <memory>
 #include <mutex>
 #include <numeric>
 #include <queue>
 #include <random>
+#include <regex>
 #include <set>
 #include <sstream>
-#include <stdexcept>
 #include <thread>
-#include <type_traits>
 #include <unordered_map>
 
 #if defined(_MSC_VER)
diff --git a/llama.h b/llama.h
index dd16407c81d2c..3f1becd761688 100644
--- a/llama.h
+++ b/llama.h
@@ -2,14 +2,12 @@
 #define LLAMA_H
 
 #include "ggml.h"
-
 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
 #define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES
 #else
 #define LLAMA_MAX_DEVICES 1
 #endif // GGML_USE_CUBLAS
-
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -761,9 +759,8 @@ extern "C" {
 // Internal API to be implemented by llama.cpp and used by tests/benchmarks only
 #ifdef LLAMA_API_INTERNAL
 
-#include <string>
-#include <utility>
 #include <vector>
+#include <string>
 
 struct ggml_tensor;
 
diff --git a/pocs/vdot/q8dot.cpp b/pocs/vdot/q8dot.cpp
index 05e85ee300f1c..111770d5519cb 100644
--- a/pocs/vdot/q8dot.cpp
+++ b/pocs/vdot/q8dot.cpp
@@ -1,13 +1,16 @@
-#include "ggml.h"
-
-#include <algorithm>
-#include <chrono>
-#include <cmath>
-#include <cstdint>
 #include <cstdio>
-#include <cstdlib>
-#include <random>
+#include <type_traits>
 #include <vector>
+#include <random>
+#include <chrono>
+#include <cstdlib>
+#include <cmath>
+#include <cassert>
+#include <cstring>
+#include <array>
+#include <type_traits>
+
+#include <ggml.h>
 
 constexpr int kVecSize = 1 << 16;
 
diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp
index 5b0ebb7067e08..e96372c4b7107 100644
--- a/pocs/vdot/vdot.cpp
+++ b/pocs/vdot/vdot.cpp
@@ -1,15 +1,14 @@
-#include "ggml.h"
-
-#include <algorithm>
-#include <cassert>
-#include <chrono>
-#include <cmath>
-#include <cstdint>
 #include <cstdio>
-#include <cstdlib>
-#include <random>
-#include <utility>
 #include <vector>
+#include <random>
+#include <chrono>
+#include <cstdlib>
+#include <cmath>
+#include <cassert>
+#include <cstring>
+#include <array>
+
+#include <ggml.h>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp
index 49a92ed23481b..0a559b27ab370 100644
--- a/tests/test-grad0.cpp
+++ b/tests/test-grad0.cpp
@@ -1,11 +1,10 @@
 #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
 #include "ggml.h"
 
-#include <cassert>
 #include <cmath>
-#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
+#include <cassert>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp
index b4d825b6271e2..a0b5b043df868 100644
--- a/tests/test-grammar-parser.cpp
+++ b/tests/test-grammar-parser.cpp
@@ -2,16 +2,10 @@
 #undef NDEBUG
 #endif
 
-#include "grammar-parser.h"
 #include "llama.h"
+#include "grammar-parser.h"
 
 #include <cassert>
-#include <cstdint>
-#include <cstdio>
-#include <map>
-#include <string>
-#include <utility>
-#include <vector>
 
 int main()
 {
diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp
index cbbe95bd3e6a1..73dd33dd286a5 100644
--- a/tests/test-llama-grammar.cpp
+++ b/tests/test-llama-grammar.cpp
@@ -6,12 +6,6 @@
 #include "grammar-parser.h"
 
 #include <cassert>
-#include <cstdint>
-#include <cstdio>
-#include <map>
-#include <string>
-#include <utility>
-#include <vector>
 
 int main()
 {
diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp
index 3b88b49890bd4..a2459a2867c5c 100644
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@@ -2,9 +2,10 @@
 
 #include "ggml.h"
 
-#include <cmath>
-#include <cstdint>
-#include <cstdio>
+#undef NDEBUG
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
 #include <string>
 #include <vector>
 
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp
index b4260e54882ea..88fac0e23106b 100644
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -2,12 +2,14 @@
 
 #include "ggml.h"
 
+#undef NDEBUG
 #include <algorithm>
-#include <cinttypes>
-#include <cmath>
-#include <cstdio>
+#include <assert.h>
 #include <functional>
+#include <inttypes.h>
+#include <math.h>
 #include <memory>
+#include <stdio.h>
 #include <string>
 #include <vector>
 
diff --git a/tests/test-rope.cpp b/tests/test-rope.cpp
index 2fda30ccba531..26c1f42dc0e95 100644
--- a/tests/test-rope.cpp
+++ b/tests/test-rope.cpp
@@ -1,10 +1,9 @@
 #include "ggml.h"
 
-#include <cassert>
 #include <cmath>
-#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
+#include <cassert>
 #include <vector>
 
 #if defined(_MSC_VER)
diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp
index 63ae06d309452..32e58941c0ee0 100644
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@@ -1,9 +1,15 @@
 #include "ggml.h"
 #include "llama.h"
 
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
 #include <cmath>
-#include <cstdio>
+#include <numeric>
+#include <cassert>
 #include <vector>
+#include <algorithm>
 
 static void dump(const llama_token_data_array * candidates) {
     for (size_t i = 0; i < candidates->size; i++) {
diff --git a/tests/test-tokenizer-0-falcon.cpp b/tests/test-tokenizer-0-falcon.cpp
index d2f983baa5402..a4e9d2b912728 100644
--- a/tests/test-tokenizer-0-falcon.cpp
+++ b/tests/test-tokenizer-0-falcon.cpp
@@ -1,14 +1,12 @@
+#include "llama.h"
 #include "common.h"
 #include "console.h"
-#include "llama.h"
 
 #include <cstdio>
-#include <fstream>
-#include <iterator>
-#include <map>
 #include <string>
-#include <utility>
+#include <map>
 #include <vector>
+#include <fstream>
 
 // generate using test-tokenizer-0-falcon.py
 static const std::map<std::string, std::vector<llama_token>> & k_tests() {
diff --git a/tests/test-tokenizer-0-llama.cpp b/tests/test-tokenizer-0-llama.cpp
index c00a668de95f4..39c8d188c9086 100644
--- a/tests/test-tokenizer-0-llama.cpp
+++ b/tests/test-tokenizer-0-llama.cpp
@@ -1,14 +1,12 @@
+#include "llama.h"
 #include "common.h"
 #include "console.h"
-#include "llama.h"
 
 #include <cstdio>
-#include <fstream>
-#include <iterator>
-#include <map>
 #include <string>
-#include <utility>
+#include <map>
 #include <vector>
+#include <fstream>
 
 // generate using test-tokenizer-0-llama.py
 static const std::map<std::string, std::vector<llama_token>> & k_tests() {
diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp
index cc924da5ce56d..386530f23f92c 100644
--- a/tests/test-tokenizer-1-bpe.cpp
+++ b/tests/test-tokenizer-1-bpe.cpp
@@ -1,14 +1,16 @@
-#include "common.h"
-#include "console.h"
-#include "ggml.h"
 #include "llama.h"
+#include "common.h"
 #include "unicode.h"
+#include "console.h"
 
-#include <cstdint>
+#include <cassert>
 #include <cstdio>
-#include <stdexcept>
+#include <cstring>
 #include <string>
+#include <codecvt>
+#include <map>
 #include <vector>
+#include <locale>
 
 int main(int argc, char **argv) {
     if (argc < 2) {
diff --git a/tests/test-tokenizer-1-llama.cpp b/tests/test-tokenizer-1-llama.cpp
index 95832f899cafc..4b58fe4954cf3 100644
--- a/tests/test-tokenizer-1-llama.cpp
+++ b/tests/test-tokenizer-1-llama.cpp
@@ -1,13 +1,16 @@
-#include "common.h"
-#include "console.h"
-#include "ggml.h"
 #include "llama.h"
+#include "common.h"
 #include "unicode.h"
+#include "console.h"
 
-#include <cstdint>
+#include <cassert>
 #include <cstdio>
+#include <cstring>
 #include <string>
+#include <codecvt>
+#include <map>
 #include <vector>
+#include <locale>
 
 int main(int argc, char **argv) {
     if (argc < 2) {