Skip to content

Commit

Permalink
feat: Sync with llama.cpp
Browse files Browse the repository at this point in the history
Add `no_perf` field to `llama_context_params` to optionally disable performance timing measurements.
  • Loading branch information
shakalaca committed Jan 18, 2025
1 parent 0580cf2 commit 83ed554
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions llama_cpp/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,7 @@ class llama_context_params(ctypes.Structure):
embeddings (bool): if true, extract embeddings (together with logits)
offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
flash_attn (bool): whether to use flash attention
no_perf (bool): whether to measure performance timings
abort_callback (ggml_abort_callback): abort callback if it returns true, execution of llama_decode() will be aborted
abort_callback_data (ctypes.ctypes.c_void_p): data for abort_callback
"""
Expand Down Expand Up @@ -812,6 +813,7 @@ class llama_context_params(ctypes.Structure):
embeddings: bool
offload_kqv: bool
flash_attn: bool
no_perf: bool
abort_callback: Callable[[ctypes.c_void_p], bool]
abort_callback_data: ctypes.c_void_p

Expand Down Expand Up @@ -841,6 +843,7 @@ class llama_context_params(ctypes.Structure):
("embeddings", ctypes.c_bool),
("offload_kqv", ctypes.c_bool),
("flash_attn", ctypes.c_bool),
("no_perf", ctypes.c_bool),
("abort_callback", ggml_abort_callback),
("abort_callback_data", ctypes.c_void_p),
]
Expand Down

0 comments on commit 83ed554

Please sign in to comment.