llama : reuse hparams.f_max_alibi_bias in all cases
ggml-ci
This commit is contained in:
parent
7e0c3778fb
commit
6ca762eccf
1 changed files with 17 additions and 11 deletions
28
llama.cpp
28
llama.cpp
|
@ -1557,10 +1557,10 @@ struct llama_hparams {
|
|||
uint32_t n_yarn_orig_ctx;
|
||||
int32_t rope_scaling_type_train;
|
||||
|
||||
float f_clamp_kqv;
|
||||
float f_max_alibi_bias;
|
||||
float f_clamp_kqv = 0.0f;
|
||||
float f_max_alibi_bias = 0.0f;
|
||||
|
||||
bool causal_attn = true;
|
||||
bool causal_attn = true;
|
||||
bool pooling_layer = false;
|
||||
|
||||
|
||||
|
@ -3053,6 +3053,11 @@ static void llm_load_hparams(
|
|||
case 40: model.type = e_model::MODEL_13B; break;
|
||||
default: model.type = e_model::MODEL_UNKNOWN;
|
||||
}
|
||||
|
||||
if (model.type == e_model::MODEL_13B) {
|
||||
// TODO: become GGUF KV parameter
|
||||
hparams.f_max_alibi_bias = 8.0f;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_STARCODER:
|
||||
{
|
||||
|
@ -3080,6 +3085,9 @@ static void llm_load_hparams(
|
|||
case 32: model.type = e_model::MODEL_1B; break;
|
||||
default: model.type = e_model::MODEL_UNKNOWN;
|
||||
}
|
||||
|
||||
// TODO: become GGUF KV parameter
|
||||
hparams.f_max_alibi_bias = 8.0f;
|
||||
} break;
|
||||
case LLM_ARCH_BERT:
|
||||
{
|
||||
|
@ -3125,11 +3133,12 @@ static void llm_load_hparams(
|
|||
case 4096: model.type = e_model::MODEL_7B; break;
|
||||
} break;
|
||||
}
|
||||
|
||||
// TODO: become GGUF KV parameter
|
||||
hparams.f_max_alibi_bias = 8.0f;
|
||||
} break;
|
||||
case LLM_ARCH_MPT:
|
||||
{
|
||||
hparams.f_clamp_kqv = 0.0f;
|
||||
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
ml.get_key(LLM_KV_ATTENTION_CLAMP_KQV, hparams.f_clamp_kqv, false);
|
||||
ml.get_key(LLM_KV_ATTENTION_MAX_ALIBI_BIAS, hparams.f_max_alibi_bias);
|
||||
|
@ -5254,12 +5263,9 @@ struct llm_build_context {
|
|||
cb(Kcur, "Kcur", il);
|
||||
|
||||
|
||||
// apply ALiBi for 13B model
|
||||
const float max_alibi_bias = model.type == MODEL_13B ? 8.0f : -1.0f;
|
||||
|
||||
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
||||
model.layers[il].wo, NULL,
|
||||
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
||||
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, hparams.f_max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
||||
cb(cur, "kqv_out", il);
|
||||
}
|
||||
|
||||
|
@ -5776,7 +5782,7 @@ struct llm_build_context {
|
|||
|
||||
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
||||
model.layers[il].wo, NULL,
|
||||
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, 8.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
||||
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, hparams.f_max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
||||
cb(cur, "kqv_out", il);
|
||||
}
|
||||
|
||||
|
@ -6012,7 +6018,7 @@ struct llm_build_context {
|
|||
|
||||
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
||||
model.layers[il].wo, model.layers[il].bo,
|
||||
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, 8.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
||||
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, hparams.f_max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
||||
cb(cur, "kqv_out", il);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue