From c197f0fcbda6234527382a01db71dc11c7a6e9c0 Mon Sep 17 00:00:00 2001
From: Matias Lin <matiasenoclin@gmail.com>
Date: Fri, 27 Sep 2024 10:03:33 -0700
Subject: [PATCH] common: ensure token addition to batch does not exceed
 llama_batch size

A crash was observed when the number of tokens added to a batch exceeds
llama_batch size. An assertion in llama_batch_add was added to protect
against llama_batch size overflow.
---
 common/common.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/common/common.cpp b/common/common.cpp
index 8d0ed4f95..614771500 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1432,6 +1432,8 @@ void llama_batch_add(
                           llama_pos   pos,
     const std::vector<llama_seq_id> & seq_ids,
                                bool   logits) {
+    GGML_ASSERT(batch.seq_id[batch.n_tokens] && "llama_batch size exceeded");
+
     batch.token   [batch.n_tokens] = id;
     batch.pos     [batch.n_tokens] = pos;
     batch.n_seq_id[batch.n_tokens] = seq_ids.size();