llama: apply the mllama support patch

Signed-off-by: YiYing He <yiying@secondstate.io>
This commit is contained in:
YiYing He 2025-01-15 17:07:09 +08:00
parent cde3833239
commit 45a89e0cec
16 changed files with 440 additions and 11 deletions

View file

@ -632,7 +632,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
if (llama_model_has_encoder(&model)) {
n_attn_layer *= 3;
}
GGML_ASSERT((qs.n_attention_wv == n_attn_layer) && "n_attention_wv is unexpected");
if (qs.n_attention_wv != n_attn_layer) {
LLAMA_LOG_WARN("%s: n_attention_wv is unexpected, expected: %d, found: %d\n", __func__, n_attn_layer, qs.n_attention_wv);
}
}
size_t total_size_org = 0;