a brief explaining comment
This commit is contained in:
parent
de3d90dd3b
commit
54af80752c
1 changed files with 5 additions and 2 deletions
7
llama.h
7
llama.h
|
@ -252,11 +252,14 @@ extern "C" {
|
||||||
|
|
||||||
// Keep the booleans together to avoid misalignment during copy-by-value.
|
// Keep the booleans together to avoid misalignment during copy-by-value.
|
||||||
bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
|
bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
|
||||||
bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
||||||
bool embedding; // embedding mode only
|
bool embedding; // embedding mode only
|
||||||
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
||||||
bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
|
bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
|
||||||
|
|
||||||
|
// Abort callback
|
||||||
|
// if it returns true, execution of llama_decode() will be aborted
|
||||||
|
// currently works only with CPU execution
|
||||||
ggml_abort_callback abort_callback;
|
ggml_abort_callback abort_callback;
|
||||||
void * abort_callback_data;
|
void * abort_callback_data;
|
||||||
};
|
};
|
||||||
|
@ -667,7 +670,7 @@ extern "C" {
|
||||||
// Set abort callback
|
// Set abort callback
|
||||||
LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data);
|
LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||||
|
|
||||||
// Token logits obtained from the last call to llama_eval()
|
// Token logits obtained from the last call to llama_decode()
|
||||||
// The logits for the last token are stored in the last row
|
// The logits for the last token are stored in the last row
|
||||||
// Logits for which llama_batch.logits[i] == 0 are undefined
|
// Logits for which llama_batch.logits[i] == 0 are undefined
|
||||||
// Rows: n_tokens provided with llama_batch
|
// Rows: n_tokens provided with llama_batch
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue