llama : reuse hparams.f_max_alibi_bias in all cases
ggml-ci
This commit is contained in:
parent
7e0c3778fb
commit
6ca762eccf
1 changed files with 17 additions and 11 deletions
28
llama.cpp
28
llama.cpp
|
@ -1557,10 +1557,10 @@ struct llama_hparams {
|
||||||
uint32_t n_yarn_orig_ctx;
|
uint32_t n_yarn_orig_ctx;
|
||||||
int32_t rope_scaling_type_train;
|
int32_t rope_scaling_type_train;
|
||||||
|
|
||||||
float f_clamp_kqv;
|
float f_clamp_kqv = 0.0f;
|
||||||
float f_max_alibi_bias;
|
float f_max_alibi_bias = 0.0f;
|
||||||
|
|
||||||
bool causal_attn = true;
|
bool causal_attn = true;
|
||||||
bool pooling_layer = false;
|
bool pooling_layer = false;
|
||||||
|
|
||||||
|
|
||||||
|
@ -3053,6 +3053,11 @@ static void llm_load_hparams(
|
||||||
case 40: model.type = e_model::MODEL_13B; break;
|
case 40: model.type = e_model::MODEL_13B; break;
|
||||||
default: model.type = e_model::MODEL_UNKNOWN;
|
default: model.type = e_model::MODEL_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (model.type == e_model::MODEL_13B) {
|
||||||
|
// TODO: become GGUF KV parameter
|
||||||
|
hparams.f_max_alibi_bias = 8.0f;
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_STARCODER:
|
case LLM_ARCH_STARCODER:
|
||||||
{
|
{
|
||||||
|
@ -3080,6 +3085,9 @@ static void llm_load_hparams(
|
||||||
case 32: model.type = e_model::MODEL_1B; break;
|
case 32: model.type = e_model::MODEL_1B; break;
|
||||||
default: model.type = e_model::MODEL_UNKNOWN;
|
default: model.type = e_model::MODEL_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: become GGUF KV parameter
|
||||||
|
hparams.f_max_alibi_bias = 8.0f;
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_BERT:
|
case LLM_ARCH_BERT:
|
||||||
{
|
{
|
||||||
|
@ -3125,11 +3133,12 @@ static void llm_load_hparams(
|
||||||
case 4096: model.type = e_model::MODEL_7B; break;
|
case 4096: model.type = e_model::MODEL_7B; break;
|
||||||
} break;
|
} break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: become GGUF KV parameter
|
||||||
|
hparams.f_max_alibi_bias = 8.0f;
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_MPT:
|
case LLM_ARCH_MPT:
|
||||||
{
|
{
|
||||||
hparams.f_clamp_kqv = 0.0f;
|
|
||||||
|
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||||
ml.get_key(LLM_KV_ATTENTION_CLAMP_KQV, hparams.f_clamp_kqv, false);
|
ml.get_key(LLM_KV_ATTENTION_CLAMP_KQV, hparams.f_clamp_kqv, false);
|
||||||
ml.get_key(LLM_KV_ATTENTION_MAX_ALIBI_BIAS, hparams.f_max_alibi_bias);
|
ml.get_key(LLM_KV_ATTENTION_MAX_ALIBI_BIAS, hparams.f_max_alibi_bias);
|
||||||
|
@ -5254,12 +5263,9 @@ struct llm_build_context {
|
||||||
cb(Kcur, "Kcur", il);
|
cb(Kcur, "Kcur", il);
|
||||||
|
|
||||||
|
|
||||||
// apply ALiBi for 13B model
|
|
||||||
const float max_alibi_bias = model.type == MODEL_13B ? 8.0f : -1.0f;
|
|
||||||
|
|
||||||
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
||||||
model.layers[il].wo, NULL,
|
model.layers[il].wo, NULL,
|
||||||
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, hparams.f_max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
||||||
cb(cur, "kqv_out", il);
|
cb(cur, "kqv_out", il);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5776,7 +5782,7 @@ struct llm_build_context {
|
||||||
|
|
||||||
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
||||||
model.layers[il].wo, NULL,
|
model.layers[il].wo, NULL,
|
||||||
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, 8.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, hparams.f_max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
||||||
cb(cur, "kqv_out", il);
|
cb(cur, "kqv_out", il);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6012,7 +6018,7 @@ struct llm_build_context {
|
||||||
|
|
||||||
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
||||||
model.layers[il].wo, model.layers[il].bo,
|
model.layers[il].wo, model.layers[il].bo,
|
||||||
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, 8.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, hparams.f_max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
||||||
cb(cur, "kqv_out", il);
|
cb(cur, "kqv_out", il);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue