metal : pad shared memory to 16 bytes

2024-04-03 15:25:47 +03:00 · 2024-04-03 15:25:47 +03:00 · 716e960a80
commit 716e960a80
parent a054283c0f
1 changed files with 4 additions and 1 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -2417,7 +2417,10 @@ static enum ggml_status ggml_metal_graph_compute(
                            ne00_padded *= 2;
                        }

-                        const int mem_size = ne00_padded*sizeof(int32_t);
+                        // Metal kernels require the buffer size to be multiple of 16 bytes
+                        // https://developer.apple.com/documentation/metal/mtlcomputecommandencoder/1443142-setthreadgroupmemorylength
+                        const int mem_size = GGML_PAD(ne00_padded*sizeof(int32_t), 16);
+
                        id<MTLComputePipelineState> pipeline = nil;

                        switch (order) {