attempting to add stanford changes

ggml-org · Apr 29, 2024 · 0ea4873 · 0ea4873
1 parent b8c1476
commit 0ea4873
Show file tree

Hide file tree

Showing 5 changed files with 123 additions and 0 deletions.
diff --git a/Package.swift b/Package.swift
@@ -11,6 +11,11 @@ var sources = [
     "ggml-alloc.c",
     "ggml-backend.c",
     "ggml-quants.c",
+    "common/common.cpp",
+    "common/grammar-parser.cpp",
+    "common/sampling.cpp",
+    "common/log.cpp",
+    "tokenize.cpp"
 ]
 
 var resources: [Resource] = []

diff --git a/tokenize.cpp b/tokenize.cpp
@@ -0,0 +1,37 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+#include "tokenize.h"
+
+
+/// Tokenize a `String` via a given `llama_context`.
+std::vector<llama_token> llama_tokenize_with_context(
+     const struct llama_context * ctx,
+     const std::string & text,
+     bool add_bos,
+     bool special) {
+    return llama_tokenize(ctx, text, add_bos, special);
+}
+
+/// Tokenize a `char` array via a given `llama_context`.
+std::vector<llama_token> llama_tokenize_with_context_from_char_array(
+     const struct llama_context * ctx,
+     const char* text,
+     bool add_bos,
+     bool special) {
+    return llama_tokenize(ctx, std::string(text), add_bos, special);
+}
+
+/// Tokenize a `String` via a given `llama_model`.
+std::vector<llama_token> llama_tokenize_with_model(
+     const struct llama_model * model,
+     const std::string & text,
+     bool add_bos,
+     bool special) {
+    return llama_tokenize(model, text, add_bos, special);
+}
diff --git a/tokenize.h b/tokenize.h
@@ -0,0 +1,38 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+#ifndef tokenize_hpp
+#define tokenize_hpp
+
+#include <vector>
+#include <string>
+#include "common/common.h"
+
+
+/// Tokenize a `String` via a given `llama_context`.
+std::vector<llama_token> llama_tokenize_with_context(
+     const struct llama_context * ctx,
+     const std::string & text,
+     bool add_bos,
+     bool special = false);
+
+/// Tokenize a `char` array via a given `llama_context`.
+std::vector<llama_token> llama_tokenize_with_context_from_char_array(
+     const struct llama_context * ctx,
+     const char* text,
+     bool add_bos,
+     bool special = false);
+
+/// Tokenize a `String` via a given `llama_model`.
+std::vector<llama_token> llama_tokenize_with_model(
+     const struct llama_model * model,
+     const std::string & text,
+     bool add_bos,
+     bool special = false);
+
+#endif
diff --git a/vector.cpp b/vector.cpp
@@ -0,0 +1,21 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+#include "vector.h"
+
+
+/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing.
+const std::vector<llama_seq_id> getLlamaSeqIdVector() {
+    const std::vector<llama_seq_id> vec = { 0 };
+    return vec;
+}
+
+/// Get `array` representation of C++ `vector`.
+const int* vectorToIntArray(const std::vector<int>& vec) {
+    return vec.data();
+}
diff --git a/vector.h b/vector.h
@@ -0,0 +1,22 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+#ifndef vector_hpp
+#define vector_hpp
+
+#include <vector>
+#include "common.h"
+
+
+/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing.
+const std::vector<llama_seq_id> getLlamaSeqIdVector();
+
+/// Get `array` representation of C++ `vector`.
+const int* vectorToIntArray(const std::vector<int>& vec);
+
+#endif