feat: use the overridden declaration of llama_token_to_piece from common/common.cpp to specify "false" so that control tokens are not shown in chat completion responses"

This commit is contained in:
Kyle Mistele 2024-04-23 22:51:01 -05:00
parent 206c974eb6
commit 6c081e501c

View file

@ -1118,7 +1118,7 @@ struct server_context {
bool process_token(completion_token_output & result, server_slot & slot) { bool process_token(completion_token_output & result, server_slot & slot) {
// remember which tokens were sampled - used for repetition penalties during sampling // remember which tokens were sampled - used for repetition penalties during sampling
const std::string token_str = llama_token_to_piece(ctx, result.tok); const std::string token_str = llama_token_to_piece(ctx, result.tok, false);
slot.sampled = result.tok; slot.sampled = result.tok;
// search stop word and delete it // search stop word and delete it