metal : optimize softmax

This commit is contained in:
Georgi Gerganov 2024-02-01 20:53:29 +02:00
parent 56e45a239e
commit cda5a60a41
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 22 additions and 17 deletions

View file

@ -2285,8 +2285,9 @@ static bool ggml_metal_graph_compute(
const int64_t nqptg = 8; // queries per threadgroup !! sync with kernel template arguments !!
const int64_t ncpsg = 32; // cache values per simdgroup !! sync with kernel template arguments !!
GGML_ASSERT(nqptg % 8 == 0);
GGML_ASSERT(ncpsg % 32 == 0);
GGML_ASSERT(nqptg <= 32);
GGML_ASSERT(nqptg % 8 == 0);
GGML_ASSERT(ncpsg % 32 == 0);
// simdgroups per threadgroup (a.k.a. warps)
// for small batches use more simdgroups (needs more tests, to confirm if it's worth it)