llama : add early return in Unigram tokenizer when normalized input is empty

This commit is contained in:
Stanisław Szymczyk 2024-07-02 11:04:04 +02:00
parent 6dc9eb4040
commit 78675f35ee

View file

@ -14888,6 +14888,9 @@ struct llm_tokenizer_ugm {
std::string normalized;
normalize(text, &normalized);
size_t input_len = normalized.size();
if (input_len == 0) {
return;
}
// initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {0, 0, -FLT_MAX});