Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Const ref pair #302

Merged
merged 11 commits into from
Aug 15, 2024
Merged
Prev Previous commit
Next Next commit
common : remove duplicate function llama_should_add_bos_token (ggml-o…
  • Loading branch information
kylo5aby authored Aug 15, 2024
commit 4af8420afba39de4968adf2695ce12fd42422a13
6 changes: 0 additions & 6 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2702,12 +2702,6 @@ std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token>
return text;
}

bool llama_should_add_bos_token(const llama_model * model) {
const int add_bos = llama_add_bos_token(model);

return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
}

//
// Chat template utils
//
Expand Down
4 changes: 0 additions & 4 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,10 +380,6 @@ std::string llama_detokenize(
const std::vector<llama_token> & tokens,
bool special = true);

// Uses the value from the model metadata if possible, otherwise
// defaults to true when model type is SPM, otherwise false.
bool llama_should_add_bos_token(const llama_model * model);

//
// Chat template utils
//
Expand Down
2 changes: 1 addition & 1 deletion examples/cvector-generator/cvector-generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ struct tokenized_prompt {
size_t max_seq_len;

tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true);
tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true);
max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
Expand Down
2 changes: 1 addition & 1 deletion examples/eval-callback/eval-callback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
}

static bool run(llama_context * ctx, const gpt_params & params) {
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));

std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, add_bos);

Expand Down
4 changes: 2 additions & 2 deletions examples/imatrix/imatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,8 @@ static void process_logits(
}

static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
const int n_ctx = llama_n_ctx(ctx);

auto tim1 = std::chrono::high_resolution_clock::now();
Expand Down
4 changes: 2 additions & 2 deletions examples/infill/infill.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ int main(int argc, char ** argv) {
LOG_TEE("\n");
LOG_TEE("%s\n", gpt_params_get_system_info(params).c_str());
}
const bool add_bos = llama_should_add_bos_token(model);
GGML_ASSERT(llama_add_eos_token(model) != 1);
const bool add_bos = llama_add_bos_token(model);
GGML_ASSERT(!llama_add_eos_token(model));
LOG("add_bos: %d\n", add_bos);

std::vector<llama_token> embd_inp;
Expand Down
4 changes: 2 additions & 2 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,9 @@ int main(int argc, char ** argv) {
}
}

const bool add_bos = llama_should_add_bos_token(model);
const bool add_bos = llama_add_bos_token(model);
if (!llama_model_has_encoder(model)) {
GGML_ASSERT(llama_add_eos_token(model) != 1);
GGML_ASSERT(!llama_add_eos_token(model));
}
LOG("add_bos: %d\n", add_bos);

Expand Down
12 changes: 6 additions & 6 deletions examples/perplexity/perplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,8 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
// Output: `perplexity: 13.5106 [114/114]`
// BOS tokens will be added for each chunk before eval

const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));

fprintf(stderr, "%s: tokenizing the input ..\n", __func__);

Expand Down Expand Up @@ -480,8 +480,8 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
// Output: `perplexity: 13.5106 [114/114]`
// BOS tokens will be added for each chunk before eval

const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));

std::ofstream logits_stream;
if (!params.logits_file.empty()) {
Expand Down Expand Up @@ -1733,8 +1733,8 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) {
const int n_batch = params.n_batch;
const int num_batches = (n_ctx + n_batch - 1)/n_batch;
const int nv = 2*((n_vocab + 1)/2) + 4;
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));

std::vector<uint16_t> log_probs_uint16(size_t(n_ctx - 1 - n_ctx/2) * nv);
std::vector<float> kld_values(size_t(n_ctx - 1 - n_ctx/2)*n_chunk);
Expand Down
7 changes: 3 additions & 4 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,9 +693,8 @@ struct server_context {

n_ctx = llama_n_ctx(ctx);

add_bos_token = llama_should_add_bos_token(model);
has_eos_token = llama_add_eos_token(model) != 1;

add_bos_token = llama_add_bos_token(model);
has_eos_token = !llama_add_eos_token(model);
return true;
}

Expand Down Expand Up @@ -2038,7 +2037,7 @@ struct server_context {
slot.t_start_generation = 0;

if (slot.infill) {
const bool add_bos = llama_should_add_bos_token(model);
const bool add_bos = llama_add_bos_token(model);
bool suff_rm_leading_spc = true;
if (params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
params.input_suffix.erase(0, 1);
Expand Down
2 changes: 1 addition & 1 deletion examples/tokenize/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ int main(int raw_argc, char ** raw_argv) {
prompt = stdin_buffer.str();
}

const bool model_wants_add_bos = llama_should_add_bos_token(model);
const bool model_wants_add_bos = llama_add_bos_token(model);
const bool add_bos = model_wants_add_bos && !no_bos;
const bool parse_special = !no_parse_special;

Expand Down
7 changes: 2 additions & 5 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -914,11 +914,8 @@ extern "C" {
LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
LLAMA_API llama_token llama_token_pad(const struct llama_model * model); // padding

// Returns -1 if unknown, 1 for true or 0 for false.
LLAMA_API int32_t llama_add_bos_token(const struct llama_model * model);

// Returns -1 if unknown, 1 for true or 0 for false.
LLAMA_API int32_t llama_add_eos_token(const struct llama_model * model);
LLAMA_API bool llama_add_bos_token(const struct llama_model * model);
LLAMA_API bool llama_add_eos_token(const struct llama_model * model);

// Codellama infill tokens
LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
Expand Down
4 changes: 2 additions & 2 deletions src/llama-vocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1468,11 +1468,11 @@ llama_token llama_token_pad_impl(const struct llama_vocab & vocab) {
return vocab.special_pad_id;
}

int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab) {
bool llama_add_bos_token_impl(const struct llama_vocab & vocab) {
return vocab.tokenizer_add_bos;
}

int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab) {
bool llama_add_eos_token_impl(const struct llama_vocab & vocab) {
return vocab.tokenizer_add_eos;
}

Expand Down
4 changes: 2 additions & 2 deletions src/llama-vocab.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ llama_token llama_token_sep_impl(const struct llama_vocab & vocab);
llama_token llama_token_nl_impl (const struct llama_vocab & vocab);
llama_token llama_token_pad_impl(const struct llama_vocab & vocab);

int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab);
int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab);
bool llama_add_bos_token_impl(const struct llama_vocab & vocab);
bool llama_add_eos_token_impl(const struct llama_vocab & vocab);

llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
Expand Down
4 changes: 2 additions & 2 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18705,11 +18705,11 @@ llama_token llama_token_pad(const struct llama_model * model) {
return llama_token_pad_impl(model->vocab);
}

int32_t llama_add_bos_token(const struct llama_model * model) {
bool llama_add_bos_token(const struct llama_model * model) {
return llama_add_bos_token_impl(model->vocab);
}

int32_t llama_add_eos_token(const struct llama_model * model) {
bool llama_add_eos_token(const struct llama_model * model) {
return llama_add_eos_token_impl(model->vocab);
}

Expand Down