metal : pad shared memory to 16 bytes

This commit is contained in:
Georgi Gerganov 2024-04-03 15:25:47 +03:00
parent a054283c0f
commit 716e960a80
No known key found for this signature in database
GPG key ID: 449E073F9DC10735

View file

@ -2417,7 +2417,10 @@ static enum ggml_status ggml_metal_graph_compute(
ne00_padded *= 2;
}
const int mem_size = ne00_padded*sizeof(int32_t);
// Metal kernels require the buffer size to be multiple of 16 bytes
// https://developer.apple.com/documentation/metal/mtlcomputecommandencoder/1443142-setthreadgroupmemorylength
const int mem_size = GGML_PAD(ne00_padded*sizeof(int32_t), 16);
id<MTLComputePipelineState> pipeline = nil;
switch (order) {