llama: apply the mllama support patch

Signed-off-by: YiYing He <yiying@secondstate.io>
2025-01-15 17:07:09 +08:00 · 2025-01-15 17:07:09 +08:00 · 45a89e0cec
commit 45a89e0cec
parent cde3833239
16 changed files with 440 additions and 11 deletions
--- a/src/llama-model.h
+++ b/src/llama-model.h
@ -9,6 +9,7 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include <stdexcept>

 struct llama_model_loader;

@ -62,6 +63,7 @@ enum llm_type {
    LLM_TYPE_40B,
    LLM_TYPE_65B,
    LLM_TYPE_70B,
+    LLM_TYPE_90B,
    LLM_TYPE_236B,
    LLM_TYPE_314B,
    LLM_TYPE_671B,
@ -281,6 +283,16 @@ struct llama_layer {
    struct ggml_tensor * ffn_up_scale   = nullptr;
    struct ggml_tensor * ffn_down_scale = nullptr;

+    // cross attention
+    struct ggml_tensor * cross_attn_k_norm = nullptr;
+    struct ggml_tensor * cross_attn_k_proj = nullptr;
+    struct ggml_tensor * cross_attn_o_proj = nullptr;
+    struct ggml_tensor * cross_attn_q_norm = nullptr;
+    struct ggml_tensor * cross_attn_q_proj = nullptr;
+    struct ggml_tensor * cross_attn_v_proj = nullptr;
+    struct ggml_tensor * cross_attn_attn_gate = nullptr;
+    struct ggml_tensor * cross_attn_mlp_gate = nullptr;
+
    struct llama_layer_posnet posnet;

    struct llama_layer_convnext convnext;