llama : switch the loop order in build_defrag

2024-02-25 17:51:02 +02:00 · 2024-02-25 17:51:02 +02:00 · 0b72ded501
commit 0b72ded501
parent 4eaaace394
1 changed files with 12 additions and 12 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -5114,7 +5114,6 @@ struct llm_build_context {
    struct ggml_cgraph * build_defrag(const std::vector<uint32_t> & ids) {
        struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);

-        for (int il = 0; il < n_layer; ++il) {
        for (int i = 0; i < n_kv; ++i) {
            const int id = ids[i];

@ -5128,6 +5127,7 @@ struct llm_build_context {
                nm++;
            }

+            for (int il = 0; il < n_layer; ++il) {
                ggml_tensor * view_k_src = ggml_view_2d(ctx0, kv_self.k_l[il],
                        n_embd_k_gqa, nm,
                        ggml_row_size(kv_self.k_l[il]->type, n_embd_k_gqa),
@ -5150,10 +5150,10 @@ struct llm_build_context {

                ggml_build_forward_expand(gf, ggml_cpy(ctx0, view_k_src, view_k_dst));
                ggml_build_forward_expand(gf, ggml_cpy(ctx0, view_v_src, view_v_dst));
+            }

            i += nm - 1;
        }
-        }

        return gf;
    }