From 54af80752c0d08ddbda83bb1a6d4843364f100fc Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Mon, 26 Feb 2024 16:20:35 +0100 Subject: [PATCH] a brief explaining comment --- llama.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llama.h b/llama.h index d1ebcdeaf..c47d2e35f 100644 --- a/llama.h +++ b/llama.h @@ -252,11 +252,14 @@ extern "C" { // Keep the booleans together to avoid misalignment during copy-by-value. bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true) - bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead) + bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead) bool embedding; // embedding mode only bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer) + // Abort callback + // if it returns true, execution of llama_decode() will be aborted + // currently works only with CPU execution ggml_abort_callback abort_callback; void * abort_callback_data; }; @@ -667,7 +670,7 @@ extern "C" { // Set abort callback LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data); - // Token logits obtained from the last call to llama_eval() + // Token logits obtained from the last call to llama_decode() // The logits for the last token are stored in the last row // Logits for which llama_batch.logits[i] == 0 are undefined // Rows: n_tokens provided with llama_batch