common : revert showing control tokens by default for server (#6860)
* fix: revert showing control tokens by default * feat: revert changes to default behavior of llama_token_to_piece; provide overridden declaration to receive "bool special" param to toggle showing control tokens * feat: use the overridden declaration of llama_token_to_piece from common/common.cpp to specify "false" so that control tokens are not shown in chat completion responses" * common : simplify --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
28103f4832
commit
37246b1031
3 changed files with 7 additions and 6 deletions
|
@ -2328,12 +2328,12 @@ std::vector<llama_token> llama_tokenize(
|
|||
return result;
|
||||
}
|
||||
|
||||
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
|
||||
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
|
||||
std::vector<char> result(8, 0);
|
||||
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
|
||||
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
|
||||
if (n_tokens < 0) {
|
||||
result.resize(-n_tokens);
|
||||
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
|
||||
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
|
||||
GGML_ASSERT(check == -n_tokens);
|
||||
} else {
|
||||
result.resize(n_tokens);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue