-
Notifications
You must be signed in to change notification settings - Fork 10.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Steven Prichard
committed
Apr 29, 2024
1 parent
b8c1476
commit 0ea4873
Showing
5 changed files
with
123 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
// | ||
// This source file is part of the Stanford Spezi open source project | ||
// | ||
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) | ||
// | ||
// SPDX-License-Identifier: MIT | ||
// | ||
|
||
#include "tokenize.h" | ||
|
||
|
||
/// Tokenize a `String` via a given `llama_context`. | ||
std::vector<llama_token> llama_tokenize_with_context( | ||
const struct llama_context * ctx, | ||
const std::string & text, | ||
bool add_bos, | ||
bool special) { | ||
return llama_tokenize(ctx, text, add_bos, special); | ||
} | ||
|
||
/// Tokenize a `char` array via a given `llama_context`. | ||
std::vector<llama_token> llama_tokenize_with_context_from_char_array( | ||
const struct llama_context * ctx, | ||
const char* text, | ||
bool add_bos, | ||
bool special) { | ||
return llama_tokenize(ctx, std::string(text), add_bos, special); | ||
} | ||
|
||
/// Tokenize a `String` via a given `llama_model`. | ||
std::vector<llama_token> llama_tokenize_with_model( | ||
const struct llama_model * model, | ||
const std::string & text, | ||
bool add_bos, | ||
bool special) { | ||
return llama_tokenize(model, text, add_bos, special); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
// | ||
// This source file is part of the Stanford Spezi open source project | ||
// | ||
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) | ||
// | ||
// SPDX-License-Identifier: MIT | ||
// | ||
|
||
#ifndef tokenize_hpp | ||
#define tokenize_hpp | ||
|
||
#include <vector> | ||
#include <string> | ||
#include "common/common.h" | ||
|
||
|
||
/// Tokenize a `String` via a given `llama_context`. | ||
std::vector<llama_token> llama_tokenize_with_context( | ||
const struct llama_context * ctx, | ||
const std::string & text, | ||
bool add_bos, | ||
bool special = false); | ||
|
||
/// Tokenize a `char` array via a given `llama_context`. | ||
std::vector<llama_token> llama_tokenize_with_context_from_char_array( | ||
const struct llama_context * ctx, | ||
const char* text, | ||
bool add_bos, | ||
bool special = false); | ||
|
||
/// Tokenize a `String` via a given `llama_model`. | ||
std::vector<llama_token> llama_tokenize_with_model( | ||
const struct llama_model * model, | ||
const std::string & text, | ||
bool add_bos, | ||
bool special = false); | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// | ||
// This source file is part of the Stanford Spezi open source project | ||
// | ||
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) | ||
// | ||
// SPDX-License-Identifier: MIT | ||
// | ||
|
||
#include "vector.h" | ||
|
||
|
||
/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing. | ||
const std::vector<llama_seq_id> getLlamaSeqIdVector() { | ||
const std::vector<llama_seq_id> vec = { 0 }; | ||
return vec; | ||
} | ||
|
||
/// Get `array` representation of C++ `vector`. | ||
const int* vectorToIntArray(const std::vector<int>& vec) { | ||
return vec.data(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
// | ||
// This source file is part of the Stanford Spezi open source project | ||
// | ||
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) | ||
// | ||
// SPDX-License-Identifier: MIT | ||
// | ||
|
||
#ifndef vector_hpp | ||
#define vector_hpp | ||
|
||
#include <vector> | ||
#include "common.h" | ||
|
||
|
||
/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing. | ||
const std::vector<llama_seq_id> getLlamaSeqIdVector(); | ||
|
||
/// Get `array` representation of C++ `vector`. | ||
const int* vectorToIntArray(const std::vector<int>& vec); | ||
|
||
#endif |