llama : sanitize tokens in the upper bound

ggml-ci
2024-09-08 00:33:40 +02:00 · 2024-09-08 00:33:40 +02:00 · 297ba5c3af
commit 297ba5c3af
parent a5b5d9a101
1 changed files with 2 additions and 2 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -16067,7 +16067,7 @@ static int llama_decode_internal(
    }

    for (uint32_t i = 0; i < n_tokens_all; ++i) {
-        if (batch_all.token[i] < 0) {
+        if (batch_all.token[i] < 0 || (uint32_t)batch_all.token[i] >= lctx.model.vocab.n_vocab) {
            LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch_all.token[i]);
            return -1;
        }
@ -16366,7 +16366,7 @@ static int llama_encode_internal(
    }

    for (uint32_t i = 0; i < n_tokens; ++i) {
-        if (batch.token[i] < 0) {
+        if (batch.token[i] < 0 || (uint32_t)batch.token[i] >= lctx.model.vocab.n_vocab) {
            LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch.token[i]);
            return -1;
        }