From 47ae9b8f34ac6ac655ea2b9807f4f7ce6fc63abd Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Sat, 6 Jan 2024 06:32:37 +0100 Subject: [PATCH] iq2_xxs: fix MoE on Metal --- ggml-metal.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml-metal.m b/ggml-metal.m index 43536e5f4..6c2a8d04e 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -1980,9 +1980,9 @@ bool ggml_metal_graph_compute( src2t == GGML_TYPE_Q2_K) { // || src2t == GGML_TYPE_Q4_K) { [encoder dispatchThreadgroups:MTLSizeMake((ne21 + 7)/8, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)]; } - else if (src0t == GGML_TYPE_IQ2_XXS) { + else if (src2t == GGML_TYPE_IQ2_XXS) { [encoder setThreadgroupMemoryLength:(256*8+128) atIndex:0]; - [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7)/8, ne11, ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)]; + [encoder dispatchThreadgroups:MTLSizeMake((ne21 + 7)/8, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)]; } else if (src2t == GGML_TYPE_Q4_K) { [encoder dispatchThreadgroups:MTLSizeMake((ne21 + 3)/4, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];