llama : add remove_space_prefix to llama_detokenize
This commit adds a new parameter to llama_detokenize to remove the leading space before tokens if they have a word boundary character. The motivation for this change is that when llama_server returns completion_propabilities, the tokens are detokenized and currently the leading space for the boundary tokens are removed. With this change llama_server can set remove_space_prefix to false and the leading space will be preserved. Resolves: https://github.com/ggerganov/llama.cpp/issues/11728
This commit is contained in:
parent
d7b31a9d84
commit
cc1fd2fd0d
7 changed files with 35 additions and 24 deletions
|
@ -1746,19 +1746,19 @@ std::string common_token_to_piece(const struct llama_vocab * vocab, llama_token
|
|||
return piece;
|
||||
}
|
||||
|
||||
std::string common_detokenize(const struct llama_context * ctx, const std::vector<llama_token> & tokens, bool special) {
|
||||
std::string common_detokenize(const struct llama_context * ctx, const std::vector<llama_token> & tokens, bool special, bool remove_space_prefix) {
|
||||
const llama_model * model = llama_get_model(ctx);
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
return common_detokenize(vocab, tokens, special);
|
||||
return common_detokenize(vocab, tokens, special, remove_space_prefix);
|
||||
}
|
||||
|
||||
std::string common_detokenize(const struct llama_vocab * vocab, const std::vector<llama_token> & tokens, bool special) {
|
||||
std::string common_detokenize(const struct llama_vocab * vocab, const std::vector<llama_token> & tokens, bool special, bool remove_space_prefix) {
|
||||
std::string text;
|
||||
text.resize(std::max(text.capacity(), tokens.size()));
|
||||
int32_t n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
|
||||
int32_t n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special, remove_space_prefix);
|
||||
if (n_chars < 0) {
|
||||
text.resize(-n_chars);
|
||||
n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
|
||||
n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special, remove_space_prefix);
|
||||
GGML_ASSERT(n_chars <= (int32_t)text.size()); // whitespace trimming is performed after per-token detokenization
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue