llama : minor / style

2024-04-24 09:39:22 +03:00 · 2024-04-24 09:39:22 +03:00 · 32661ac8b4
commit 32661ac8b4
parent 1bf93ced81
1 changed files with 13 additions and 16 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -9028,27 +9028,25 @@ struct llm_build_context {
        return gf;
    }

-    struct ggml_cgraph* build_phi3() {
-
-        struct ggml_cgraph* gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
+    struct ggml_cgraph * build_phi3() {
+        struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);

        const int64_t n_embd_head = hparams.n_embd_head_v;
        const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
        GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);

-        struct ggml_tensor* cur;
-        struct ggml_tensor* inpL;
+        struct ggml_tensor * cur;
+        struct ggml_tensor * inpL;

        inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);

        // inp_pos - contains the positions
-        struct ggml_tensor* inp_pos = build_inp_pos();
+        struct ggml_tensor * inp_pos = build_inp_pos();

        // KQ_mask (mask for 1 head, it will be broadcasted to all heads)
-        struct ggml_tensor* KQ_mask = build_inp_KQ_mask();
+        struct ggml_tensor * KQ_mask = build_inp_KQ_mask();

        for (int il = 0; il < n_layer; ++il) {
-
            auto residual = inpL;

            // self-attention
@ -9059,9 +9057,9 @@ struct llm_build_context {
                    LLM_NORM_RMS, cb, il);
                cb(attn_norm_output, "attn_norm", il);

-                struct ggml_tensor* Qcur = nullptr;
-                struct ggml_tensor* Kcur = nullptr;
-                struct ggml_tensor* Vcur = nullptr;
+                struct ggml_tensor * Qcur = nullptr;
+                struct ggml_tensor * Kcur = nullptr;
+                struct ggml_tensor * Vcur = nullptr;

                if (model.layers[il].wqkv) {
                    cur = ggml_mul_mat(ctx0, model.layers[il].wqkv, attn_norm_output);
@ -9120,6 +9118,8 @@ struct llm_build_context {
            cb(cur, "ffn_norm", il);

            // FF
+            // special-case: the up and gate tensors are merged into a single tensor
+            // TOOD: support into llm_build_ffn
            {
                struct ggml_tensor* up = ggml_mul_mat(ctx0, model.layers[il].ffn_up, cur);
                cb(up, "ffn_up", il);
@ -9152,9 +9152,6 @@ struct llm_build_context {
        cur = ggml_mul_mat(ctx0, model.output, cur);
        cb(cur, "result_output", -1);

-        //cur = ggml_add(ctx0, cur, NULL);
-        //cb(cur, "result_output", -1);
-
        ggml_build_forward_expand(gf, cur);

        return gf;