From 57eaa39c163ad5b281b24a8193c1e95fe65ed70a Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Sat, 16 Sep 2023 00:05:32 +0800 Subject: [PATCH] refactor: cleanup comments a bit --- convert-starcoder-hf-to-gguf.py | 1 + llama.cpp | 12 ++++-------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/convert-starcoder-hf-to-gguf.py b/convert-starcoder-hf-to-gguf.py index 34de69c6b..1c56dca9f 100755 --- a/convert-starcoder-hf-to-gguf.py +++ b/convert-starcoder-hf-to-gguf.py @@ -209,6 +209,7 @@ for part_name in part_names: data = data.squeeze().numpy() + # TODO: implement MQA directly, instead of duplicate into MHA. if name.endswith(".attn.c_attn.weight") or name.endswith(".attn.c_attn.bias"): print("Duplicate K,V heads to use MHA instead of MQA for", name) diff --git a/llama.cpp b/llama.cpp index 21eebfbd7..19b6805b7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3620,19 +3620,16 @@ static struct ggml_cgraph * llm_build_starcoder( // Projection cur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wo, cur), model.layers[il].bo); - // add the input + // Add the input cur = ggml_add(ctx0, cur, inpL); struct ggml_tensor * inpFF = cur; // FF { - // norm + // Norm { cur = ggml_norm(ctx0, inpFF, norm_eps); - - // cur = ln_2_g*cur + ln_2_b - // [ 768, N] cur = ggml_add(ctx0, ggml_mul(ctx0, cur, model.layers[il].ffn_norm), model.layers[il].ffn_norm_b); } @@ -3641,14 +3638,14 @@ static struct ggml_cgraph * llm_build_starcoder( // GELU activation cur = ggml_gelu(ctx0, cur); - // projection + // Projection cur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].w2, cur), model.layers[il].b2); } inpL = ggml_add(ctx0, cur, inpFF); } - // norm + // Output Norm { cur = ggml_norm(ctx0, inpL, norm_eps); cur = ggml_add(ctx0, ggml_mul(ctx0, cur, model.output_norm), model.output_norm_b); @@ -3661,7 +3658,6 @@ static struct ggml_cgraph * llm_build_starcoder( ggml_build_forward_expand(gf, cur); ggml_free(ctx0); - // norm return gf; }