Skip to content

Commit

Permalink
lookup: add print for drafting performance (ggerganov#5450)
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesGaessler authored and hodlen committed Apr 1, 2024
1 parent 9bcf464 commit 61f17fb
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion examples/lookup/lookup.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#include "common.h"
#include "ggml.h"
#include "llama.h"

#include <cmath>
#include <cstdint>
#include <cstdio>
#include <string>
#include <vector>
Expand Down Expand Up @@ -73,6 +75,8 @@ int main(int argc, char ** argv){
int n_drafted = 0;
int n_accept = 0;

int64_t t_draft_us = 0;

int n_past = inp.size();

bool has_eos = false;
Expand Down Expand Up @@ -160,7 +164,7 @@ int main(int argc, char ** argv){

// generate n_pred tokens through prompt lookup
auto prompt_lookup = [&]() -> void {
int inp_size = inp.size();
const int inp_size = inp.size();
for (int ngram_size = ngram_max ; ngram_size > ngram_min; --ngram_size){
const llama_token * ngram = &inp[inp_size - ngram_size];

Expand Down Expand Up @@ -191,8 +195,12 @@ int main(int argc, char ** argv){
return;
};

const int64_t t_start_draft_us = ggml_time_us();

prompt_lookup();

t_draft_us += ggml_time_us() - t_start_draft_us;

llama_decode(ctx, batch_tgt);
++n_past;

Expand All @@ -210,6 +218,8 @@ int main(int argc, char ** argv){
LOG_TEE("n_draft = %d\n", n_draft);
LOG_TEE("n_predict = %d\n", n_predict);
LOG_TEE("n_drafted = %d\n", n_drafted);
LOG_TEE("t_draft = %.2f ms, %.2f us per token, %.2f tokens per second\n",
t_draft_us*1e-3, 1.0f*t_draft_us/n_drafted, n_drafted/(1e-6*t_draft_us));
LOG_TEE("n_accept = %d\n", n_accept);
LOG_TEE("accept = %.3f%%\n", 100.0f * n_accept / n_drafted);

Expand Down

0 comments on commit 61f17fb

Please sign in to comment.