speculative : add infill mode
ggml-ci
This commit is contained in:
parent
0eb4e12bee
commit
b83cae088c
3 changed files with 26 additions and 22 deletions
|
@ -2315,6 +2315,7 @@ struct server_context {
|
|||
params_spec.n_draft = slot.params.speculative.n_max;
|
||||
params_spec.n_reuse = llama_n_ctx(slot.ctx_dft) - slot.params.speculative.n_max;
|
||||
params_spec.p_min = slot.params.speculative.p_min;
|
||||
params_spec.infill = slot.inf_type == SERVER_TASK_INF_TYPE_INFILL;
|
||||
|
||||
llama_tokens draft = common_speculative_gen_draft(slot.spec, params_spec, slot.cache_tokens, id);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue