From bd76198618814ce0d34be2859a0908b63f98f0f5 Mon Sep 17 00:00:00 2001
From: Yoshi Suhara <ysuhara@nvidia.com>
Date: Sat, 10 Aug 2024 22:28:16 -0700
Subject: [PATCH] Remove mutable variable

---
 src/llama.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index 06e0d6473..e3b07b80e 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -13823,9 +13823,6 @@ struct llm_build_context {
     struct ggml_cgraph * build_nemotron() {
         struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
 
-        // mutable variable, needed during the last layer of the computation to skip unused tokens
-        int32_t n_tokens = this->n_tokens;
-
         const int64_t n_embd_head = hparams.n_embd_head_v;
         GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
         //GGML_ASSERT(n_embd_head == hparams.n_rot);