llama: apply the mllama support patch

Signed-off-by: YiYing He <yiying@secondstate.io>
2025-01-15 17:07:09 +08:00 · 2025-01-15 17:07:09 +08:00 · 45a89e0cec
commit 45a89e0cec
parent cde3833239
16 changed files with 440 additions and 11 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -632,7 +632,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
        if (llama_model_has_encoder(&model)) {
            n_attn_layer *= 3;
        }
-        GGML_ASSERT((qs.n_attention_wv == n_attn_layer) && "n_attention_wv is unexpected");
+        if (qs.n_attention_wv != n_attn_layer) {
+            LLAMA_LOG_WARN("%s: n_attention_wv is unexpected, expected: %d, found: %d\n", __func__, n_attn_layer, qs.n_attention_wv);
+        }
    }

    size_t total_size_org = 0;