From 57eaa39c163ad5b281b24a8193c1e95fe65ed70a Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Sat, 16 Sep 2023 00:05:32 +0800
Subject: [PATCH] refactor: cleanup comments a bit

---
 convert-starcoder-hf-to-gguf.py |  1 +
 llama.cpp                       | 12 ++++--------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/convert-starcoder-hf-to-gguf.py b/convert-starcoder-hf-to-gguf.py
index 34de69c6b..1c56dca9f 100755
--- a/convert-starcoder-hf-to-gguf.py
+++ b/convert-starcoder-hf-to-gguf.py
@@ -209,6 +209,7 @@ for part_name in part_names:
 
         data = data.squeeze().numpy()
 
+        # TODO: implement MQA directly, instead of duplicate into MHA.
         if name.endswith(".attn.c_attn.weight") or name.endswith(".attn.c_attn.bias"):
             print("Duplicate K,V heads to use MHA instead of MQA for", name)
 
diff --git a/llama.cpp b/llama.cpp
index 21eebfbd7..19b6805b7 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3620,19 +3620,16 @@ static struct ggml_cgraph * llm_build_starcoder(
         // Projection
         cur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wo, cur), model.layers[il].bo);
 
-        // add the input
+        // Add the input
         cur = ggml_add(ctx0, cur, inpL);
 
         struct ggml_tensor * inpFF = cur;
 
         // FF
         {
-            // norm
+            // Norm
             {
                 cur = ggml_norm(ctx0, inpFF, norm_eps);
-
-                // cur = ln_2_g*cur + ln_2_b
-                // [ 768, N]
                 cur = ggml_add(ctx0, ggml_mul(ctx0, cur, model.layers[il].ffn_norm), model.layers[il].ffn_norm_b);
             }
 
@@ -3641,14 +3638,14 @@ static struct ggml_cgraph * llm_build_starcoder(
             // GELU activation
             cur = ggml_gelu(ctx0, cur);
 
-            // projection
+            // Projection
             cur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].w2, cur), model.layers[il].b2);
         }
 
         inpL = ggml_add(ctx0, cur, inpFF);
     }
 
-	// norm
+	// Output Norm
 	{
 		cur = ggml_norm(ctx0, inpL, norm_eps);
 		cur = ggml_add(ctx0, ggml_mul(ctx0, cur, model.output_norm), model.output_norm_b);
@@ -3661,7 +3658,6 @@ static struct ggml_cgraph * llm_build_starcoder(
     ggml_build_forward_expand(gf, cur);
     ggml_free(ctx0);
 
-    // norm
     return gf;
 }