llama : add remove_space_prefix to llama_detokenize
This commit adds a new parameter to llama_detokenize to remove the leading space before tokens if they have a word boundary character. The motivation for this change is that when llama_server returns completion_propabilities, the tokens are detokenized and currently the leading space for the boundary tokens are removed. With this change llama_server can set remove_space_prefix to false and the leading space will be preserved. Resolves: https://github.com/ggerganov/llama.cpp/issues/11728
This commit is contained in:
parent
d7b31a9d84
commit
cc1fd2fd0d
7 changed files with 35 additions and 24 deletions
|
@ -1746,19 +1746,19 @@ std::string common_token_to_piece(const struct llama_vocab * vocab, llama_token
|
|||
return piece;
|
||||
}
|
||||
|
||||
std::string common_detokenize(const struct llama_context * ctx, const std::vector<llama_token> & tokens, bool special) {
|
||||
std::string common_detokenize(const struct llama_context * ctx, const std::vector<llama_token> & tokens, bool special, bool remove_space_prefix) {
|
||||
const llama_model * model = llama_get_model(ctx);
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
return common_detokenize(vocab, tokens, special);
|
||||
return common_detokenize(vocab, tokens, special, remove_space_prefix);
|
||||
}
|
||||
|
||||
std::string common_detokenize(const struct llama_vocab * vocab, const std::vector<llama_token> & tokens, bool special) {
|
||||
std::string common_detokenize(const struct llama_vocab * vocab, const std::vector<llama_token> & tokens, bool special, bool remove_space_prefix) {
|
||||
std::string text;
|
||||
text.resize(std::max(text.capacity(), tokens.size()));
|
||||
int32_t n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
|
||||
int32_t n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special, remove_space_prefix);
|
||||
if (n_chars < 0) {
|
||||
text.resize(-n_chars);
|
||||
n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
|
||||
n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special, remove_space_prefix);
|
||||
GGML_ASSERT(n_chars <= (int32_t)text.size()); // whitespace trimming is performed after per-token detokenization
|
||||
}
|
||||
|
||||
|
|
|
@ -601,12 +601,14 @@ std::string common_token_to_piece(
|
|||
std::string common_detokenize(
|
||||
const struct llama_context * ctx,
|
||||
const std::vector<llama_token> & tokens,
|
||||
bool special = true);
|
||||
bool special = true,
|
||||
bool remove_space_prefix = true);
|
||||
|
||||
std::string common_detokenize(
|
||||
const struct llama_vocab * vocab,
|
||||
const std::vector<llama_token> & tokens,
|
||||
bool special = true);
|
||||
bool special = true,
|
||||
bool remove_space_prefix = true);
|
||||
|
||||
//
|
||||
// Chat template utils
|
||||
|
|
|
@ -176,12 +176,12 @@ static LlgTokenizer * llama_sampler_llg_new_tokenizer(const llama_vocab * vocab)
|
|||
|
||||
llama_token token = i;
|
||||
auto dp = (char *) token_bytes + offset;
|
||||
auto size = llama_detokenize(vocab, &token, 1, dp, max_token, false, false);
|
||||
auto size = llama_detokenize(vocab, &token, 1, dp, max_token, false, false, true);
|
||||
if (size < 0) {
|
||||
GGML_ABORT("llama_detokenize failed\n");
|
||||
}
|
||||
if (size == 0) {
|
||||
size = llama_detokenize(vocab, &token, 1, dp + 1, max_token - 1, false, true);
|
||||
size = llama_detokenize(vocab, &token, 1, dp + 1, max_token - 1, false, true, true);
|
||||
if (size < 0) {
|
||||
GGML_ABORT("llama_detokenize failed\n");
|
||||
}
|
||||
|
|
|
@ -2297,7 +2297,7 @@ struct server_context {
|
|||
for (size_t i = 0; i < std::min(n_vocab, n_probs); i++) {
|
||||
result.probs.push_back({
|
||||
cur[i].id,
|
||||
common_detokenize(ctx, {cur[i].id}, special),
|
||||
common_detokenize(ctx, {cur[i].id}, special, /* remove_space_prefix */ false),
|
||||
cur[i].p
|
||||
});
|
||||
}
|
||||
|
|
|
@ -1025,6 +1025,7 @@ extern "C" {
|
|||
/// @return Returns a negative number on failure - the number of chars/bytes that would have been returned.
|
||||
/// @param remove_special Allow to remove BOS and EOS tokens if model is configured to do so.
|
||||
/// @param unparse_special If true, special tokens are rendered in the output.
|
||||
/// @param remove_space_prefix If true, removes the leading space before tokens if they have a word boundary character.
|
||||
LLAMA_API int32_t llama_detokenize(
|
||||
const struct llama_vocab * vocab,
|
||||
const llama_token * tokens,
|
||||
|
@ -1032,7 +1033,8 @@ extern "C" {
|
|||
char * text,
|
||||
int32_t text_len_max,
|
||||
bool remove_special,
|
||||
bool unparse_special);
|
||||
bool unparse_special,
|
||||
bool remove_space_prefix);
|
||||
|
||||
//
|
||||
// Chat templates
|
||||
|
|
|
@ -1322,11 +1322,13 @@ struct llama_vocab::impl {
|
|||
char * text,
|
||||
int32_t text_len_max,
|
||||
bool remove_special,
|
||||
bool unparse_special) const;
|
||||
bool unparse_special,
|
||||
bool remove_space_prefix = true) const;
|
||||
|
||||
std::string detokenize(
|
||||
const std::vector<llama_token> & tokens,
|
||||
bool special) const;
|
||||
bool special,
|
||||
bool remove_space_prefix = true) const;
|
||||
|
||||
void print_info() const;
|
||||
|
||||
|
@ -2581,7 +2583,8 @@ int32_t llama_vocab::impl::detokenize(
|
|||
char * text,
|
||||
int32_t text_len_max,
|
||||
bool remove_special,
|
||||
bool unparse_special) const {
|
||||
bool unparse_special,
|
||||
bool remove_space_prefix) const {
|
||||
if (type == LLAMA_VOCAB_TYPE_NONE) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -2592,7 +2595,7 @@ int32_t llama_vocab::impl::detokenize(
|
|||
int32_t total = 0;
|
||||
|
||||
// remove the leading space
|
||||
bool remove_space = add_space_prefix;
|
||||
bool remove_space = add_space_prefix && remove_space_prefix;
|
||||
|
||||
if (remove_special && add_bos) {
|
||||
if (n_tokens > 0 && tokens[0] == special_bos_id) {
|
||||
|
@ -2991,17 +2994,18 @@ int32_t llama_vocab::detokenize(
|
|||
char * text,
|
||||
int32_t text_len_max,
|
||||
bool remove_special,
|
||||
bool unparse_special) const {
|
||||
return pimpl->detokenize(tokens, n_tokens, text, text_len_max, remove_special, unparse_special);
|
||||
bool unparse_special,
|
||||
bool remove_space_prefix) const {
|
||||
return pimpl->detokenize(tokens, n_tokens, text, text_len_max, remove_special, unparse_special, remove_space_prefix);
|
||||
}
|
||||
|
||||
std::string llama_vocab::detokenize(const std::vector<llama_token> & tokens, bool special) const {
|
||||
std::string llama_vocab::detokenize(const std::vector<llama_token> & tokens, bool special, bool remove_space_prefix) const {
|
||||
std::string text;
|
||||
text.resize(std::max(text.capacity(), tokens.size()));
|
||||
int32_t n_chars = detokenize(tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
|
||||
int32_t n_chars = detokenize(tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special, remove_space_prefix);
|
||||
if (n_chars < 0) {
|
||||
text.resize(-n_chars);
|
||||
n_chars = detokenize(tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
|
||||
n_chars = detokenize(tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special, remove_space_prefix);
|
||||
GGML_ASSERT(n_chars <= (int32_t)text.size()); // whitespace trimming is performed after per-token detokenization
|
||||
}
|
||||
|
||||
|
@ -3246,7 +3250,8 @@ int32_t llama_detokenize(
|
|||
char * text,
|
||||
int32_t text_len_max,
|
||||
bool remove_special,
|
||||
bool unparse_special) {
|
||||
return vocab->detokenize(tokens, n_tokens, text, text_len_max, remove_special, unparse_special);
|
||||
bool unparse_special,
|
||||
bool remove_space_prefix) {
|
||||
return vocab->detokenize(tokens, n_tokens, text, text_len_max, remove_special, unparse_special, remove_space_prefix);
|
||||
}
|
||||
|
||||
|
|
|
@ -111,11 +111,13 @@ struct llama_vocab {
|
|||
char * text,
|
||||
int32_t text_len_max,
|
||||
bool remove_special,
|
||||
bool unparse_special) const;
|
||||
bool unparse_special,
|
||||
bool remove_space_prefix = true) const;
|
||||
|
||||
std::string detokenize(
|
||||
const std::vector<llama_token> & tokens,
|
||||
bool special) const;
|
||||
bool special,
|
||||
bool remove_space_prefix = true) const;
|
||||
|
||||
void print_info() const;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue