Skip to content

Commit

Permalink
attempting to add stanford changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Steven Prichard committed Apr 29, 2024
1 parent b8c1476 commit 0ea4873
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 0 deletions.
5 changes: 5 additions & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ var sources = [
"ggml-alloc.c",
"ggml-backend.c",
"ggml-quants.c",
"common/common.cpp",
"common/grammar-parser.cpp",
"common/sampling.cpp",
"common/log.cpp",
"tokenize.cpp"
]

var resources: [Resource] = []
Expand Down
37 changes: 37 additions & 0 deletions tokenize.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//
// This source file is part of the Stanford Spezi open source project
//
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
//
// SPDX-License-Identifier: MIT
//

#include "tokenize.h"


/// Tokenize a `String` via a given `llama_context`.
std::vector<llama_token> llama_tokenize_with_context(
const struct llama_context * ctx,
const std::string & text,
bool add_bos,
bool special) {
return llama_tokenize(ctx, text, add_bos, special);
}

/// Tokenize a `char` array via a given `llama_context`.
std::vector<llama_token> llama_tokenize_with_context_from_char_array(
const struct llama_context * ctx,
const char* text,
bool add_bos,
bool special) {
return llama_tokenize(ctx, std::string(text), add_bos, special);
}

/// Tokenize a `String` via a given `llama_model`.
std::vector<llama_token> llama_tokenize_with_model(
const struct llama_model * model,
const std::string & text,
bool add_bos,
bool special) {
return llama_tokenize(model, text, add_bos, special);
}
38 changes: 38 additions & 0 deletions tokenize.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//
// This source file is part of the Stanford Spezi open source project
//
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
//
// SPDX-License-Identifier: MIT
//

#ifndef tokenize_hpp
#define tokenize_hpp

#include <vector>
#include <string>
#include "common/common.h"


/// Tokenize a `String` via a given `llama_context`.
std::vector<llama_token> llama_tokenize_with_context(
const struct llama_context * ctx,
const std::string & text,
bool add_bos,
bool special = false);

/// Tokenize a `char` array via a given `llama_context`.
std::vector<llama_token> llama_tokenize_with_context_from_char_array(
const struct llama_context * ctx,
const char* text,
bool add_bos,
bool special = false);

/// Tokenize a `String` via a given `llama_model`.
std::vector<llama_token> llama_tokenize_with_model(
const struct llama_model * model,
const std::string & text,
bool add_bos,
bool special = false);

#endif
21 changes: 21 additions & 0 deletions vector.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//
// This source file is part of the Stanford Spezi open source project
//
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
//
// SPDX-License-Identifier: MIT
//

#include "vector.h"


/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing.
const std::vector<llama_seq_id> getLlamaSeqIdVector() {
const std::vector<llama_seq_id> vec = { 0 };
return vec;
}

/// Get `array` representation of C++ `vector`.
const int* vectorToIntArray(const std::vector<int>& vec) {
return vec.data();
}
22 changes: 22 additions & 0 deletions vector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//
// This source file is part of the Stanford Spezi open source project
//
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
//
// SPDX-License-Identifier: MIT
//

#ifndef vector_hpp
#define vector_hpp

#include <vector>
#include "common.h"


/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing.
const std::vector<llama_seq_id> getLlamaSeqIdVector();

/// Get `array` representation of C++ `vector`.
const int* vectorToIntArray(const std::vector<int>& vec);

#endif

0 comments on commit 0ea4873

Please sign in to comment.