metal : f16 precision

2024-01-21 11:13:24 +02:00 · 2024-01-21 11:13:24 +02:00 · 528da7515e
commit 528da7515e
parent 1173f49c3b
2 changed files with 26 additions and 20 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -2237,8 +2237,10 @@ static bool ggml_metal_graph_compute(

                        const int nwarps = 1;

-                        GGML_ASSERT(2*32*nwarps*ne00*sizeof(float) <= ctx->device.maxThreadgroupMemoryLength);
-                        [encoder setThreadgroupMemoryLength:2*32*nwarps*ne00*sizeof(float) atIndex:0];
+                        const size_t shalf = sizeof(float)/2;
+
+                        GGML_ASSERT(2*32*nwarps*ne00*shalf <= ctx->device.maxThreadgroupMemoryLength);
+                        [encoder setThreadgroupMemoryLength:2*32*nwarps*ne00*shalf atIndex:0];

                        [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 31)/32, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(32, 1, 1)];
                    } break;