From 1be89c4002a247449d220824e76c6d26fd548eae Mon Sep 17 00:00:00 2001 From: Jan Ploski Date: Sun, 1 Oct 2023 01:14:07 +0200 Subject: [PATCH] mpt : addendum to changeset:84e30e8 - leave parameter clamp_kqv out from metadata rather than use 0.0 to indicate "no clamping" (more compliant with the current GGUF spec?) --- convert-mpt-hf-to-gguf.py | 3 ++- llama.cpp | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/convert-mpt-hf-to-gguf.py b/convert-mpt-hf-to-gguf.py index e0a8b166d..057cb34f6 100755 --- a/convert-mpt-hf-to-gguf.py +++ b/convert-mpt-hf-to-gguf.py @@ -122,7 +122,8 @@ gguf_writer.add_block_count(block_count) gguf_writer.add_feed_forward_length(4 * hparams["d_model"]) gguf_writer.add_head_count(hparams["n_heads"]) gguf_writer.add_layer_norm_eps(1e-05) -gguf_writer.add_clamp_kqv(hparams["attn_config"]["clip_qkv"] or 0.0) +if hparams["attn_config"]["clip_qkv"] is not None: + gguf_writer.add_clamp_kqv(hparams["attn_config"]["clip_qkv"]) gguf_writer.add_max_alibi_bias(hparams["attn_config"]["alibi_bias_max"]) # TOKENIZATION diff --git a/llama.cpp b/llama.cpp index a2bdd9d3d..2bec27b8b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1926,7 +1926,11 @@ static void llm_load_hparams( case LLM_ARCH_MPT: { GGUF_GET_KEY(ctx, hparams.f_norm_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS)); - GGUF_GET_KEY(ctx, hparams.f_clamp_kqv, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_CLAMP_KQV)); + if (gguf_find_key(ctx, kv(LLM_KV_ATTENTION_CLAMP_KQV).c_str()) >= 0) { + GGUF_GET_KEY(ctx, hparams.f_clamp_kqv, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_CLAMP_KQV)); + } else { + hparams.f_clamp_kqv = 0.0f; + } GGUF_GET_KEY(ctx, hparams.f_max_alibi_bias, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_MAX_ALIBI_BIAS)); switch (hparams.n_layer) {