metal : restore original F16 mat-vec multiplication

It works after the norm fixes
2023-09-07 15:20:07 +03:00 · 2023-09-07 15:20:07 +03:00 · 783379670a
commit 783379670a
parent ed92c3d4b2
1 changed files with 2 additions and 6 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -995,12 +995,8 @@ void ggml_metal_graph_compute(
                                else if (src0t == GGML_TYPE_Q6_K) {
                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 1)/2, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
                                } else {
-                                    [encoder setThreadgroupMemoryLength:nth0*sizeof(float) atIndex:0];
+                                    int64_t ny = (ne11 + 3)/4;
-                                    [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
+                                    [encoder dispatchThreadgroups:MTLSizeMake(ne01, ny, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
                                    // TODO: this breaks for Q4_0 - understand why and fix it
                                    //int64_t ny = (ne11 + 3)/4;
                                    //[encoder dispatchThreadgroups:MTLSizeMake(ne01, ny, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
                                }
                            }
                        } break;