llama : fix callback placement in llama_context_params
This commit is contained in:
parent
aa16b5445f
commit
0c96c72150
2 changed files with 5 additions and 5 deletions
|
@ -9265,14 +9265,14 @@ struct llama_context_params llama_context_default_params() {
|
||||||
/*.yarn_beta_fast =*/ 32.0f,
|
/*.yarn_beta_fast =*/ 32.0f,
|
||||||
/*.yarn_beta_slow =*/ 1.0f,
|
/*.yarn_beta_slow =*/ 1.0f,
|
||||||
/*.yarn_orig_ctx =*/ 0,
|
/*.yarn_orig_ctx =*/ 0,
|
||||||
|
/*.cb_eval =*/ nullptr,
|
||||||
|
/*.cb_eval_user_data =*/ nullptr,
|
||||||
/*.type_k =*/ GGML_TYPE_F16,
|
/*.type_k =*/ GGML_TYPE_F16,
|
||||||
/*.type_v =*/ GGML_TYPE_F16,
|
/*.type_v =*/ GGML_TYPE_F16,
|
||||||
/*.mul_mat_q =*/ true,
|
/*.mul_mat_q =*/ true,
|
||||||
/*.logits_all =*/ false,
|
/*.logits_all =*/ false,
|
||||||
/*.embedding =*/ false,
|
/*.embedding =*/ false,
|
||||||
/*.offload_kqv =*/ true,
|
/*.offload_kqv =*/ true,
|
||||||
/*.cb_eval =*/ nullptr,
|
|
||||||
/*.cb_eval_user_data =*/ nullptr,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
6
llama.h
6
llama.h
|
@ -232,6 +232,9 @@ extern "C" {
|
||||||
float yarn_beta_slow; // YaRN high correction dim
|
float yarn_beta_slow; // YaRN high correction dim
|
||||||
uint32_t yarn_orig_ctx; // YaRN original context size
|
uint32_t yarn_orig_ctx; // YaRN original context size
|
||||||
|
|
||||||
|
ggml_backend_sched_eval_callback cb_eval;
|
||||||
|
void * cb_eval_user_data;
|
||||||
|
|
||||||
enum ggml_type type_k; // data type for K cache
|
enum ggml_type type_k; // data type for K cache
|
||||||
enum ggml_type type_v; // data type for V cache
|
enum ggml_type type_v; // data type for V cache
|
||||||
|
|
||||||
|
@ -240,9 +243,6 @@ extern "C" {
|
||||||
bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
||||||
bool embedding; // embedding mode only
|
bool embedding; // embedding mode only
|
||||||
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
||||||
|
|
||||||
ggml_backend_sched_eval_callback cb_eval;
|
|
||||||
void * cb_eval_user_data;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// model quantization parameters
|
// model quantization parameters
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue