fix: add some changes as per review
This commit is contained in:
parent
f8d1709061
commit
da96368535
2 changed files with 9 additions and 6 deletions
8
ggml.c
8
ggml.c
|
@ -12436,13 +12436,13 @@ static void ggml_compute_forward_soft_max_f32(
|
|||
|
||||
if (use_f16) {
|
||||
for (int i = 0; i < nc; ++i) {
|
||||
wp[i] += slope*GGML_FP16_TO_FP32(pos_f16[i]);
|
||||
//wp[i] = wp[i] - slope*abs(i1%nc - i);
|
||||
//wp[i] -= slope*GGML_FP16_TO_FP32(pos_f16[i]);
|
||||
wp[i] -= slope*abs(i1%nc - i);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < nc; ++i) {
|
||||
wp[i] += slope*pos_f32[i];
|
||||
//wp[i] = wp[i] - slope*abs(i1%nc - i);
|
||||
//wp[i] -= slope*pos_f32[i];
|
||||
wp[i] -= slope*abs(i1%nc - i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8254,6 +8254,9 @@ struct llm_build_context {
|
|||
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
||||
struct ggml_tensor * KQ_mask = build_inp_KQ_mask(false);
|
||||
|
||||
// positions of the tokens in the KV cache
|
||||
struct ggml_tensor * KQ_pos = build_inp_KQ_pos(false);
|
||||
|
||||
// iterate layers
|
||||
for (int il = 0; il < n_layer; ++il) {
|
||||
struct ggml_tensor * cur = inpL;
|
||||
|
@ -8322,7 +8325,7 @@ struct llm_build_context {
|
|||
struct ggml_tensor * kq = ggml_mul_mat(ctx0, k, q);
|
||||
cb(kq, "kq", il);
|
||||
|
||||
kq = ggml_soft_max_ext(ctx0, kq, KQ_mask, nullptr, 1.0f/sqrtf(float(n_embd_head)), hparams.f_max_alibi_bias);
|
||||
kq = ggml_soft_max_ext(ctx0, kq, KQ_mask, KQ_pos, 1.0f/sqrtf(float(n_embd_head)), hparams.f_max_alibi_bias);
|
||||
cb(kq, "kq_soft_max_ext", il);
|
||||
|
||||
struct ggml_tensor * v = ggml_cont(ctx0, ggml_transpose(ctx0, ggml_reshape_2d(ctx0, Vcur, n_embd_gqa, n_tokens)));
|
||||
|
@ -11523,7 +11526,7 @@ static int llama_decode_internal(
|
|||
}
|
||||
|
||||
// non-causal masks do not use the KV cache
|
||||
if (hparams.causal_attn) {
|
||||
if (hparams.causal_attn || model.arch == LLM_ARCH_JINA_BERT_V2) {
|
||||
llama_kv_cache_update(&lctx);
|
||||
|
||||
// if we have enough unused cells before the current head ->
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue