Do not remove space when decoding special tokens

This commit is contained in:
jaime-m-p 2024-06-24 20:37:48 +02:00
parent 44c8648461
commit 9eb0fca027

View file

@ -18506,7 +18506,7 @@ int32_t llama_detokenize(
bool special) { bool special) {
// remove the leading space of the first non-control token // remove the leading space of the first non-control token
static const int attr_special = LLAMA_TOKEN_ATTR_UNKNOWN | LLAMA_TOKEN_ATTR_CONTROL; static const int attr_special = LLAMA_TOKEN_ATTR_UNKNOWN | LLAMA_TOKEN_ATTR_CONTROL;
bool remove_space = model->vocab.tokenizer_add_space_prefix; bool remove_space = !special && model->vocab.tokenizer_add_space_prefix;
int32_t avail = text_len_max; int32_t avail = text_len_max;
int32_t total = 0; int32_t total = 0;