token healing : change argument order
This commit is contained in:
parent
ea4abc9d82
commit
b317368191
4 changed files with 9 additions and 9 deletions
|
@ -128,8 +128,8 @@ static llama_token_healing_output llama_token_healing_get_prefix(
|
|||
|
||||
llama_token_healing_output llama_token_healing_rollback(
|
||||
const llama_context * ctx_main,
|
||||
llama_token_healing_type th_type,
|
||||
std::vector<llama_token> & tokens,
|
||||
llama_token_healing_type th_type,
|
||||
int max_to_remove) {
|
||||
// NB. To avoid returning empty `tokens`, at least 1 token will remain in `tokens` after rolling back.
|
||||
// It is the caller's responsibility to add BOS to the start of the prompt if they want to roll back the whole prompt.
|
||||
|
|
|
@ -189,8 +189,8 @@ struct llama_token_healing_output {
|
|||
// Call `llama_token_healing_set_prefix` with the returned prefix before the first sampling.
|
||||
llama_token_healing_output llama_token_healing_rollback(
|
||||
const llama_context * ctx_main,
|
||||
llama_token_healing_type th_type,
|
||||
std::vector<llama_token> & tokens,
|
||||
llama_token_healing_type th_type,
|
||||
int max_to_remove = -1);
|
||||
|
||||
void llama_token_healing_set_prefix(llama_sampling_context * ctx_sampling, const std::string & prefix);
|
||||
|
|
|
@ -297,8 +297,8 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
llama_token_healing_output token_healing_out{};
|
||||
if (!params.interactive_first && sparams.token_healing.enabled) {
|
||||
token_healing_out = llama_token_healing_rollback(ctx, sparams.token_healing.type, embd_inp,
|
||||
sparams.token_healing.n_rollback);
|
||||
token_healing_out = llama_token_healing_rollback(ctx, embd_inp,
|
||||
sparams.token_healing.type, sparams.token_healing.n_rollback);
|
||||
}
|
||||
|
||||
// Should not run without any tokens
|
||||
|
@ -962,7 +962,7 @@ int main(int argc, char ** argv) {
|
|||
const int max_to_remove = sparams.token_healing.n_rollback < 0
|
||||
? n_new_tokens
|
||||
: std::min(sparams.token_healing.n_rollback, n_new_tokens);
|
||||
token_healing_out = llama_token_healing_rollback(ctx, sparams.token_healing.type, embd_inp, max_to_remove);
|
||||
token_healing_out = llama_token_healing_rollback(ctx, embd_inp, sparams.token_healing.type, max_to_remove);
|
||||
n_bytes_to_skip = token_healing_out.prefix.size();
|
||||
}
|
||||
|
||||
|
|
|
@ -2100,8 +2100,8 @@ struct server_context {
|
|||
|
||||
if (slot.sparams.token_healing.enabled) {
|
||||
// For FIM roll back only the prefix part (i.e. cursor location)
|
||||
token_healing_out = llama_token_healing_rollback(ctx, slot.sparams.token_healing.type,
|
||||
prefix_tokens, slot.sparams.token_healing.n_rollback);
|
||||
token_healing_out = llama_token_healing_rollback(ctx, prefix_tokens,
|
||||
slot.sparams.token_healing.type, slot.sparams.token_healing.n_rollback);
|
||||
}
|
||||
|
||||
auto embd_inp = params.spm_infill ? suffix_tokens : prefix_tokens;
|
||||
|
@ -2121,8 +2121,8 @@ struct server_context {
|
|||
prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt
|
||||
|
||||
if (slot.sparams.token_healing.enabled) {
|
||||
token_healing_out = llama_token_healing_rollback(ctx, slot.sparams.token_healing.type,
|
||||
prompt_tokens, slot.sparams.token_healing.n_rollback);
|
||||
token_healing_out = llama_token_healing_rollback(ctx, prompt_tokens,
|
||||
slot.sparams.token_healing.type, slot.sparams.token_healing.n_rollback);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue