mpt : addendum to changeset:84e30e8 - leave parameter clamp_kqv out from metadata rather than use 0.0 to indicate "no clamping" (more compliant with the current GGUF spec?)
This commit is contained in:
parent
00e8c5c5f6
commit
1be89c4002
2 changed files with 7 additions and 2 deletions
|
@ -122,7 +122,8 @@ gguf_writer.add_block_count(block_count)
|
||||||
gguf_writer.add_feed_forward_length(4 * hparams["d_model"])
|
gguf_writer.add_feed_forward_length(4 * hparams["d_model"])
|
||||||
gguf_writer.add_head_count(hparams["n_heads"])
|
gguf_writer.add_head_count(hparams["n_heads"])
|
||||||
gguf_writer.add_layer_norm_eps(1e-05)
|
gguf_writer.add_layer_norm_eps(1e-05)
|
||||||
gguf_writer.add_clamp_kqv(hparams["attn_config"]["clip_qkv"] or 0.0)
|
if hparams["attn_config"]["clip_qkv"] is not None:
|
||||||
|
gguf_writer.add_clamp_kqv(hparams["attn_config"]["clip_qkv"])
|
||||||
gguf_writer.add_max_alibi_bias(hparams["attn_config"]["alibi_bias_max"])
|
gguf_writer.add_max_alibi_bias(hparams["attn_config"]["alibi_bias_max"])
|
||||||
|
|
||||||
# TOKENIZATION
|
# TOKENIZATION
|
||||||
|
|
|
@ -1926,7 +1926,11 @@ static void llm_load_hparams(
|
||||||
case LLM_ARCH_MPT:
|
case LLM_ARCH_MPT:
|
||||||
{
|
{
|
||||||
GGUF_GET_KEY(ctx, hparams.f_norm_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS));
|
GGUF_GET_KEY(ctx, hparams.f_norm_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS));
|
||||||
|
if (gguf_find_key(ctx, kv(LLM_KV_ATTENTION_CLAMP_KQV).c_str()) >= 0) {
|
||||||
GGUF_GET_KEY(ctx, hparams.f_clamp_kqv, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_CLAMP_KQV));
|
GGUF_GET_KEY(ctx, hparams.f_clamp_kqv, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_CLAMP_KQV));
|
||||||
|
} else {
|
||||||
|
hparams.f_clamp_kqv = 0.0f;
|
||||||
|
}
|
||||||
GGUF_GET_KEY(ctx, hparams.f_max_alibi_bias, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_MAX_ALIBI_BIAS));
|
GGUF_GET_KEY(ctx, hparams.f_max_alibi_bias, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_MAX_ALIBI_BIAS));
|
||||||
|
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_layer) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue