From 09ff755ecceed3e98d07cb6e8d4555663a7c4690 Mon Sep 17 00:00:00 2001 From: Michael Coppola Date: Mon, 23 Oct 2023 17:30:28 -0400 Subject: [PATCH] Added special token support to llama_tokenize() calls in server.cpp --- examples/server/server.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 693f9b773..5edbfccca 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -620,12 +620,12 @@ struct llama_server_context std::vector p; if (first) { - p = ::llama_tokenize(ctx, s, add_bos); + p = ::llama_tokenize(ctx, s, add_bos, true); first = false; } else { - p = ::llama_tokenize(ctx, s, false); + p = ::llama_tokenize(ctx, s, false, true); } prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end()); } @@ -642,7 +642,7 @@ struct llama_server_context else { auto s = json_prompt.template get(); - prompt_tokens = ::llama_tokenize(ctx, s, add_bos); + prompt_tokens = ::llama_tokenize(ctx, s, add_bos, true); } return prompt_tokens; @@ -861,7 +861,7 @@ struct llama_server_context } void update_system_prompt() { - system_tokens = ::llama_tokenize(ctx, system_prompt, true); + system_tokens = ::llama_tokenize(ctx, system_prompt, true, true); llama_batch_clear(batch); @@ -1180,7 +1180,7 @@ struct llama_server_context if (slot.sparams.n_probs > 0) { std::vector probs_output = {}; - const std::vector to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false); + const std::vector to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false, true); size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size()); if (probs_pos < probs_stop_pos) @@ -1226,7 +1226,7 @@ struct llama_server_context std::vector probs = {}; if (!slot.params.stream && slot.stopped_word) { - const std::vector stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false); + const std::vector stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false, true); probs = std::vector(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size()); } else