cuda : increase C to 128 for better performance

2024-02-01 16:12:56 +02:00 · 2024-02-01 16:12:56 +02:00 · ac26f27028
commit ac26f27028
parent 9a5c2a1681
4 changed files with 37 additions and 29 deletions
--- a/ggml.c
+++ b/ggml.c
@ -5089,7 +5089,7 @@ static struct ggml_tensor * ggml_soft_max_impl(
        GGML_ASSERT(ggml_is_contiguous(mask));
        GGML_ASSERT(mask->ne[2] == 1);
        GGML_ASSERT(mask->ne[3] == 1);
-        GGML_ASSERT(ggml_can_repeat_rows(mask, a));
+        GGML_ASSERT(mask->ne[1] >= a->ne[1]);
    }

    bool is_node = false;