llama : adapt to F16 KQ_pos
This commit is contained in:
parent
31109ca00a
commit
f249c997a8
4 changed files with 13 additions and 8 deletions
|
@ -1505,7 +1505,7 @@ struct test_attn : public test_case {
|
|||
struct ggml_tensor * cur;
|
||||
|
||||
cur = ggml_mul_mat (ctx, k, q);
|
||||
cur = ggml_soft_max_ext(ctx, cur, mask, 1.0f/sqrtf(hs));
|
||||
cur = ggml_soft_max_ext(ctx, cur, mask, nullptr, 1.0f/sqrtf(hs), 0.0f);
|
||||
cur = ggml_mul_mat (ctx, v, cur);
|
||||
cur = ggml_permute (ctx, cur, 0, 2, 1, 3);
|
||||
cur = ggml_cont_2d (ctx, cur, hs*nh, nb);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue