a brief explaining comment

This commit is contained in:
Michael Podvitskiy 2024-02-26 16:20:35 +01:00
parent de3d90dd3b
commit 54af80752c

View file

@ -252,11 +252,14 @@ extern "C" {
// Keep the booleans together to avoid misalignment during copy-by-value. // Keep the booleans together to avoid misalignment during copy-by-value.
bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true) bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead) bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
bool embedding; // embedding mode only bool embedding; // embedding mode only
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer) bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
// Abort callback
// if it returns true, execution of llama_decode() will be aborted
// currently works only with CPU execution
ggml_abort_callback abort_callback; ggml_abort_callback abort_callback;
void * abort_callback_data; void * abort_callback_data;
}; };
@ -667,7 +670,7 @@ extern "C" {
// Set abort callback // Set abort callback
LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data); LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data);
// Token logits obtained from the last call to llama_eval() // Token logits obtained from the last call to llama_decode()
// The logits for the last token are stored in the last row // The logits for the last token are stored in the last row
// Logits for which llama_batch.logits[i] == 0 are undefined // Logits for which llama_batch.logits[i] == 0 are undefined
// Rows: n_tokens provided with llama_batch // Rows: n_tokens provided with llama_batch