llama : fix callback placement in llama_context_params

This commit is contained in:
Georgi Gerganov 2024-01-16 10:52:38 +02:00
parent aa16b5445f
commit 0c96c72150
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 5 additions and 5 deletions

View file

@ -9265,14 +9265,14 @@ struct llama_context_params llama_context_default_params() {
/*.yarn_beta_fast =*/ 32.0f, /*.yarn_beta_fast =*/ 32.0f,
/*.yarn_beta_slow =*/ 1.0f, /*.yarn_beta_slow =*/ 1.0f,
/*.yarn_orig_ctx =*/ 0, /*.yarn_orig_ctx =*/ 0,
/*.cb_eval =*/ nullptr,
/*.cb_eval_user_data =*/ nullptr,
/*.type_k =*/ GGML_TYPE_F16, /*.type_k =*/ GGML_TYPE_F16,
/*.type_v =*/ GGML_TYPE_F16, /*.type_v =*/ GGML_TYPE_F16,
/*.mul_mat_q =*/ true, /*.mul_mat_q =*/ true,
/*.logits_all =*/ false, /*.logits_all =*/ false,
/*.embedding =*/ false, /*.embedding =*/ false,
/*.offload_kqv =*/ true, /*.offload_kqv =*/ true,
/*.cb_eval =*/ nullptr,
/*.cb_eval_user_data =*/ nullptr,
}; };
return result; return result;

View file

@ -232,6 +232,9 @@ extern "C" {
float yarn_beta_slow; // YaRN high correction dim float yarn_beta_slow; // YaRN high correction dim
uint32_t yarn_orig_ctx; // YaRN original context size uint32_t yarn_orig_ctx; // YaRN original context size
ggml_backend_sched_eval_callback cb_eval;
void * cb_eval_user_data;
enum ggml_type type_k; // data type for K cache enum ggml_type type_k; // data type for K cache
enum ggml_type type_v; // data type for V cache enum ggml_type type_v; // data type for V cache
@ -240,9 +243,6 @@ extern "C" {
bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead) bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
bool embedding; // embedding mode only bool embedding; // embedding mode only
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
ggml_backend_sched_eval_callback cb_eval;
void * cb_eval_user_data;
}; };
// model quantization parameters // model quantization parameters