From b3173681918c3ddd4611a955145bc96513ec7b63 Mon Sep 17 00:00:00 2001 From: mare5x Date: Mon, 1 Jul 2024 12:23:21 +0200 Subject: [PATCH] token healing : change argument order --- common/sampling.cpp | 2 +- common/sampling.h | 2 +- examples/main/main.cpp | 6 +++--- examples/server/server.cpp | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index 2d1610b39..e9f828bef 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -128,8 +128,8 @@ static llama_token_healing_output llama_token_healing_get_prefix( llama_token_healing_output llama_token_healing_rollback( const llama_context * ctx_main, - llama_token_healing_type th_type, std::vector & tokens, + llama_token_healing_type th_type, int max_to_remove) { // NB. To avoid returning empty `tokens`, at least 1 token will remain in `tokens` after rolling back. // It is the caller's responsibility to add BOS to the start of the prompt if they want to roll back the whole prompt. diff --git a/common/sampling.h b/common/sampling.h index a269ab11e..257c2aaeb 100644 --- a/common/sampling.h +++ b/common/sampling.h @@ -189,8 +189,8 @@ struct llama_token_healing_output { // Call `llama_token_healing_set_prefix` with the returned prefix before the first sampling. llama_token_healing_output llama_token_healing_rollback( const llama_context * ctx_main, - llama_token_healing_type th_type, std::vector & tokens, + llama_token_healing_type th_type, int max_to_remove = -1); void llama_token_healing_set_prefix(llama_sampling_context * ctx_sampling, const std::string & prefix); diff --git a/examples/main/main.cpp b/examples/main/main.cpp index e8a0eefb9..07144a7cb 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -297,8 +297,8 @@ int main(int argc, char ** argv) { } llama_token_healing_output token_healing_out{}; if (!params.interactive_first && sparams.token_healing.enabled) { - token_healing_out = llama_token_healing_rollback(ctx, sparams.token_healing.type, embd_inp, - sparams.token_healing.n_rollback); + token_healing_out = llama_token_healing_rollback(ctx, embd_inp, + sparams.token_healing.type, sparams.token_healing.n_rollback); } // Should not run without any tokens @@ -962,7 +962,7 @@ int main(int argc, char ** argv) { const int max_to_remove = sparams.token_healing.n_rollback < 0 ? n_new_tokens : std::min(sparams.token_healing.n_rollback, n_new_tokens); - token_healing_out = llama_token_healing_rollback(ctx, sparams.token_healing.type, embd_inp, max_to_remove); + token_healing_out = llama_token_healing_rollback(ctx, embd_inp, sparams.token_healing.type, max_to_remove); n_bytes_to_skip = token_healing_out.prefix.size(); } diff --git a/examples/server/server.cpp b/examples/server/server.cpp index ef2d7fa21..a564e32ab 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2100,8 +2100,8 @@ struct server_context { if (slot.sparams.token_healing.enabled) { // For FIM roll back only the prefix part (i.e. cursor location) - token_healing_out = llama_token_healing_rollback(ctx, slot.sparams.token_healing.type, - prefix_tokens, slot.sparams.token_healing.n_rollback); + token_healing_out = llama_token_healing_rollback(ctx, prefix_tokens, + slot.sparams.token_healing.type, slot.sparams.token_healing.n_rollback); } auto embd_inp = params.spm_infill ? suffix_tokens : prefix_tokens; @@ -2121,8 +2121,8 @@ struct server_context { prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt if (slot.sparams.token_healing.enabled) { - token_healing_out = llama_token_healing_rollback(ctx, slot.sparams.token_healing.type, - prompt_tokens, slot.sparams.token_healing.n_rollback); + token_healing_out = llama_token_healing_rollback(ctx, prompt_tokens, + slot.sparams.token_healing.type, slot.sparams.token_healing.n_rollback); } }