llama : add early return in Unigram tokenizer when normalized input is empty

2024-07-02 11:04:04 +02:00 · 2024-07-02 11:04:04 +02:00 · 78675f35ee
commit 78675f35ee
parent 6dc9eb4040
1 changed files with 3 additions and 0 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -14888,6 +14888,9 @@ struct llm_tokenizer_ugm {
        std::string normalized;
        normalize(text, &normalized);
        size_t input_len = normalized.size();
+        if (input_len == 0) {
+            return;
+        }

        // initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
        std::vector<struct best_tokenization> tokenization_results(input_len + 1, {0, 0, -FLT_MAX});