From bd86c4c4df4469fcdcf41e1fd83215839c2c798b Mon Sep 17 00:00:00 2001
From: Junhee Yoo <junhee.yoo@navercorp.com>
Date: Wed, 23 Oct 2024 11:19:23 +0900
Subject: [PATCH] apply more optimization

- original IM2COL kernel + _ext with MIN()

Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>
---
 ggml/src/ggml-metal.m | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m
index cc6f00475..584d626ca 100644
--- a/ggml/src/ggml-metal.m
+++ b/ggml/src/ggml-metal.m
@@ -2625,7 +2625,7 @@ static void ggml_metal_encode_node(
                     [encoder setBytes:&KW       length:sizeof(int32_t) atIndex:15];
 
                     const int64_t D = N / M + (N % M > 0 ? 1 : 0);
-                    [encoder dispatchThreadgroups:MTLSizeMake(D * CHW, OH, OW) threadsPerThreadgroup:MTLSizeMake(M, 1, 1)];
+                    [encoder dispatchThreadgroups:MTLSizeMake(D * CHW, OH, OW) threadsPerThreadgroup:MTLSizeMake(MIN((uint64_t)N, M), 1, 1)];
                 } else {
                     [encoder dispatchThreadgroups:MTLSizeMake(IC, OH, OW) threadsPerThreadgroup:MTLSizeMake(N, KH, KW)];
                 }