llama : adapt to F16 KQ_pos

2024-02-19 13:10:24 +02:00 · 2024-02-19 13:10:24 +02:00 · f249c997a8
commit f249c997a8
parent 31109ca00a
4 changed files with 13 additions and 8 deletions
--- a/ggml.c
+++ b/ggml.c
@ -5192,7 +5192,7 @@ static struct ggml_tensor * ggml_soft_max_impl(
        GGML_ASSERT(mask->type == GGML_TYPE_F16);
        GGML_ASSERT(ggml_is_contiguous(mask));
        GGML_ASSERT(ggml_is_matrix(mask));
-        GGML_ASSERT(ggml_can_repeat_rows(mask, a));
+        GGML_ASSERT(mask->ne[1] >= a->ne[1]);
    }

    if (pos) {