apply more optimization

- original IM2COL kernel + _ext with MIN() Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>
2024-10-23 11:19:23 +09:00 · 2024-10-23 11:19:23 +09:00 · bd86c4c4df
commit bd86c4c4df
parent 0084847991
1 changed files with 1 additions and 1 deletions
--- a/ggml/src/ggml-metal.m
+++ b/ggml/src/ggml-metal.m
@ -2625,7 +2625,7 @@ static void ggml_metal_encode_node(
                    [encoder setBytes:&KW       length:sizeof(int32_t) atIndex:15];

                    const int64_t D = N / M + (N % M > 0 ? 1 : 0);
-                    [encoder dispatchThreadgroups:MTLSizeMake(D * CHW, OH, OW) threadsPerThreadgroup:MTLSizeMake(M, 1, 1)];
+                    [encoder dispatchThreadgroups:MTLSizeMake(D * CHW, OH, OW) threadsPerThreadgroup:MTLSizeMake(MIN((uint64_t)N, M), 1, 1)];
                } else {
                    [encoder dispatchThreadgroups:MTLSizeMake(IC, OH, OW) threadsPerThreadgroup:MTLSizeMake(N, KH, KW)];
                }