From 716e960a808e525ecec45fc701cb7fbe607de06a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 3 Apr 2024 15:25:47 +0300 Subject: [PATCH] metal : pad shared memory to 16 bytes --- ggml-metal.m | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ggml-metal.m b/ggml-metal.m index 51a5fab3a..419d8b9e5 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -2417,7 +2417,10 @@ static enum ggml_status ggml_metal_graph_compute( ne00_padded *= 2; } - const int mem_size = ne00_padded*sizeof(int32_t); + // Metal kernels require the buffer size to be multiple of 16 bytes + // https://developer.apple.com/documentation/metal/mtlcomputecommandencoder/1443142-setthreadgroupmemorylength + const int mem_size = GGML_PAD(ne00_padded*sizeof(int32_t), 16); + id pipeline = nil; switch (order) {