apply more optimization
- original IM2COL kernel + _ext with MIN() Signed-off-by: Junhee Yoo <junhee.yoo@navercorp.com>
This commit is contained in:
parent
0084847991
commit
bd86c4c4df
1 changed files with 1 additions and 1 deletions
|
@ -2625,7 +2625,7 @@ static void ggml_metal_encode_node(
|
|||
[encoder setBytes:&KW length:sizeof(int32_t) atIndex:15];
|
||||
|
||||
const int64_t D = N / M + (N % M > 0 ? 1 : 0);
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(D * CHW, OH, OW) threadsPerThreadgroup:MTLSizeMake(M, 1, 1)];
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(D * CHW, OH, OW) threadsPerThreadgroup:MTLSizeMake(MIN((uint64_t)N, M), 1, 1)];
|
||||
} else {
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(IC, OH, OW) threadsPerThreadgroup:MTLSizeMake(N, KH, KW)];
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue