From 297ba5c3afbda5f2b822d88ad8bdcba93e2f5e60 Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Sun, 8 Sep 2024 00:33:40 +0200
Subject: [PATCH] llama : sanitize tokens in the upper bound

ggml-ci
---
 src/llama.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index 190564fa4..fc30fda69 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -16067,7 +16067,7 @@ static int llama_decode_internal(
     }
 
     for (uint32_t i = 0; i < n_tokens_all; ++i) {
-        if (batch_all.token[i] < 0) {
+        if (batch_all.token[i] < 0 || (uint32_t)batch_all.token[i] >= lctx.model.vocab.n_vocab) {
             LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch_all.token[i]);
             return -1;
         }
@@ -16366,7 +16366,7 @@ static int llama_encode_internal(
     }
 
     for (uint32_t i = 0; i < n_tokens; ++i) {
-        if (batch.token[i] < 0) {
+        if (batch.token[i] < 0 || (uint32_t)batch.token[i] >= lctx.model.vocab.n_vocab) {
             LLAMA_LOG_ERROR("%s: invalid token[%d] = %d", __func__, i, batch.token[i]);
             return -1;
         }