llama : fix not enough space in buffer with Qwen

2024-01-22 21:05:25 +01:00 · 2024-01-22 21:05:25 +01:00 · f0bb1052c6
commit f0bb1052c6
parent 6f9939d119
1 changed files with 1 additions and 1 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -4440,9 +4440,9 @@ static struct ggml_tensor * llm_build_kv(

    // these nodes are added to the graph together so that they are not reordered
    // by doing so, the number of splits in the graph is reduced
+    ggml_build_forward_expand(graph, q_cur);
    ggml_build_forward_expand(graph, k_cur);
    ggml_build_forward_expand(graph, v_cur);
-    ggml_build_forward_expand(graph, q_cur);

    llm_build_kv_store(ctx, hparams, kv, graph, k_cur, v_cur, n_ctx, n_tokens, kv_head, cb, il);