From bd86c4c4df4469fcdcf41e1fd83215839c2c798b Mon Sep 17 00:00:00 2001 From: Junhee Yoo Date: Wed, 23 Oct 2024 11:19:23 +0900 Subject: [PATCH] apply more optimization - original IM2COL kernel + _ext with MIN() Signed-off-by: Junhee Yoo --- ggml/src/ggml-metal.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m index cc6f00475..584d626ca 100644 --- a/ggml/src/ggml-metal.m +++ b/ggml/src/ggml-metal.m @@ -2625,7 +2625,7 @@ static void ggml_metal_encode_node( [encoder setBytes:&KW length:sizeof(int32_t) atIndex:15]; const int64_t D = N / M + (N % M > 0 ? 1 : 0); - [encoder dispatchThreadgroups:MTLSizeMake(D * CHW, OH, OW) threadsPerThreadgroup:MTLSizeMake(M, 1, 1)]; + [encoder dispatchThreadgroups:MTLSizeMake(D * CHW, OH, OW) threadsPerThreadgroup:MTLSizeMake(MIN((uint64_t)N, M), 1, 1)]; } else { [encoder dispatchThreadgroups:MTLSizeMake(IC, OH, OW) threadsPerThreadgroup:MTLSizeMake(N, KH, KW)]; }