feat: use the overridden declaration of llama_token_to_piece from common/common.cpp to specify "false" so that control tokens are not shown in chat completion responses"

2024-04-23 22:51:01 -05:00 · 2024-04-23 22:51:01 -05:00 · 6c081e501c
commit 6c081e501c
parent 206c974eb6
1 changed files with 1 additions and 1 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1118,7 +1118,7 @@ struct server_context {
    bool process_token(completion_token_output & result, server_slot & slot) {
        // remember which tokens were sampled - used for repetition penalties during sampling
-        const std::string token_str = llama_token_to_piece(ctx, result.tok);
+        const std::string token_str = llama_token_to_piece(ctx, result.tok, false);
        slot.sampled = result.tok;
        // search stop word and delete it