metal : optimize softmax

2024-02-01 20:53:29 +02:00 · 2024-02-01 20:53:29 +02:00 · cda5a60a41
commit cda5a60a41
parent 56e45a239e
2 changed files with 22 additions and 17 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -2285,8 +2285,9 @@ static bool ggml_metal_graph_compute(
                        const int64_t nqptg = 8;  // queries per threadgroup    !! sync with kernel template arguments !!
                        const int64_t ncpsg = 32; // cache values per simdgroup !! sync with kernel template arguments !!

-                        GGML_ASSERT(nqptg % 8  == 0);
-                        GGML_ASSERT(ncpsg % 32 == 0);
+                        GGML_ASSERT(nqptg <= 32);
+                        GGML_ASSERT(nqptg  % 8  == 0);
+                        GGML_ASSERT(ncpsg  % 32 == 0);

                        // simdgroups per threadgroup (a.k.a. warps)
                        // for small batches use more simdgroups (needs more tests, to confirm if it's worth it)