fix: don't add space after special tokens when using SPM

This commit is contained in:
Gilad S 2024-06-02 19:58:43 +03:00 committed by GitHub
parent 9422c5e34b
commit fb1fef9962
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -13393,7 +13393,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
} }
if (vocab.add_space_prefix) { if (vocab.add_space_prefix) {
if (!output.size() || is_prev_special) { // prefix with space if first token if (!output.size() && !is_prev_special) { // prefix with space if first token
raw_text = " " + raw_text; raw_text = " " + raw_text;
} }
} }