feat: use the overridden declaration of llama_token_to_piece from common/common.cpp to specify "false" so that control tokens are not shown in chat completion responses"
This commit is contained in:
parent
206c974eb6
commit
6c081e501c
1 changed files with 1 additions and 1 deletions
|
@ -1118,7 +1118,7 @@ struct server_context {
|
||||||
|
|
||||||
bool process_token(completion_token_output & result, server_slot & slot) {
|
bool process_token(completion_token_output & result, server_slot & slot) {
|
||||||
// remember which tokens were sampled - used for repetition penalties during sampling
|
// remember which tokens were sampled - used for repetition penalties during sampling
|
||||||
const std::string token_str = llama_token_to_piece(ctx, result.tok);
|
const std::string token_str = llama_token_to_piece(ctx, result.tok, false);
|
||||||
slot.sampled = result.tok;
|
slot.sampled = result.tok;
|
||||||
|
|
||||||
// search stop word and delete it
|
// search stop word and delete it
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue