From 6c081e501caa281f9de6780e025896c4508b5739 Mon Sep 17 00:00:00 2001 From: Kyle Mistele Date: Tue, 23 Apr 2024 22:51:01 -0500 Subject: [PATCH] feat: use the overridden declaration of llama_token_to_piece from common/common.cpp to specify "false" so that control tokens are not shown in chat completion responses" --- examples/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 25bc29639..ced8f584f 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1118,7 +1118,7 @@ struct server_context { bool process_token(completion_token_output & result, server_slot & slot) { // remember which tokens were sampled - used for repetition penalties during sampling - const std::string token_str = llama_token_to_piece(ctx, result.tok); + const std::string token_str = llama_token_to_piece(ctx, result.tok, false); slot.sampled = result.tok; // search stop word and delete it