llama : reuse hparams.f_max_alibi_bias in all cases

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-02-14 13:54:55 +02:00
parent 7e0c3778fb
commit 6ca762eccf
No known key found for this signature in database
GPG key ID: 449E073F9DC10735

View file

@ -1557,10 +1557,10 @@ struct llama_hparams {
uint32_t n_yarn_orig_ctx; uint32_t n_yarn_orig_ctx;
int32_t rope_scaling_type_train; int32_t rope_scaling_type_train;
float f_clamp_kqv; float f_clamp_kqv = 0.0f;
float f_max_alibi_bias; float f_max_alibi_bias = 0.0f;
bool causal_attn = true; bool causal_attn = true;
bool pooling_layer = false; bool pooling_layer = false;
@ -3053,6 +3053,11 @@ static void llm_load_hparams(
case 40: model.type = e_model::MODEL_13B; break; case 40: model.type = e_model::MODEL_13B; break;
default: model.type = e_model::MODEL_UNKNOWN; default: model.type = e_model::MODEL_UNKNOWN;
} }
if (model.type == e_model::MODEL_13B) {
// TODO: become GGUF KV parameter
hparams.f_max_alibi_bias = 8.0f;
}
} break; } break;
case LLM_ARCH_STARCODER: case LLM_ARCH_STARCODER:
{ {
@ -3080,6 +3085,9 @@ static void llm_load_hparams(
case 32: model.type = e_model::MODEL_1B; break; case 32: model.type = e_model::MODEL_1B; break;
default: model.type = e_model::MODEL_UNKNOWN; default: model.type = e_model::MODEL_UNKNOWN;
} }
// TODO: become GGUF KV parameter
hparams.f_max_alibi_bias = 8.0f;
} break; } break;
case LLM_ARCH_BERT: case LLM_ARCH_BERT:
{ {
@ -3125,11 +3133,12 @@ static void llm_load_hparams(
case 4096: model.type = e_model::MODEL_7B; break; case 4096: model.type = e_model::MODEL_7B; break;
} break; } break;
} }
// TODO: become GGUF KV parameter
hparams.f_max_alibi_bias = 8.0f;
} break; } break;
case LLM_ARCH_MPT: case LLM_ARCH_MPT:
{ {
hparams.f_clamp_kqv = 0.0f;
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps); ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
ml.get_key(LLM_KV_ATTENTION_CLAMP_KQV, hparams.f_clamp_kqv, false); ml.get_key(LLM_KV_ATTENTION_CLAMP_KQV, hparams.f_clamp_kqv, false);
ml.get_key(LLM_KV_ATTENTION_MAX_ALIBI_BIAS, hparams.f_max_alibi_bias); ml.get_key(LLM_KV_ATTENTION_MAX_ALIBI_BIAS, hparams.f_max_alibi_bias);
@ -5254,12 +5263,9 @@ struct llm_build_context {
cb(Kcur, "Kcur", il); cb(Kcur, "Kcur", il);
// apply ALiBi for 13B model
const float max_alibi_bias = model.type == MODEL_13B ? 8.0f : -1.0f;
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
model.layers[il].wo, NULL, model.layers[il].wo, NULL,
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il); Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, hparams.f_max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il);
cb(cur, "kqv_out", il); cb(cur, "kqv_out", il);
} }
@ -5776,7 +5782,7 @@ struct llm_build_context {
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
model.layers[il].wo, NULL, model.layers[il].wo, NULL,
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, 8.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il); Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, hparams.f_max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il);
cb(cur, "kqv_out", il); cb(cur, "kqv_out", il);
} }
@ -6012,7 +6018,7 @@ struct llm_build_context {
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
model.layers[il].wo, model.layers[il].bo, model.layers[il].wo, model.layers[il].bo,
Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, 8.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il); Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, hparams.f_max_alibi_bias, 1.0f/sqrtf(float(n_embd_head)), cb, il);
cb(cur, "kqv_out", il); cb(cur, "kqv_out", il);
} }