wip : 4 rows per simd group
This commit is contained in:
parent
8cde449b8b
commit
f31955f5d1
2 changed files with 24 additions and 21 deletions
|
@ -2252,14 +2252,14 @@ static bool ggml_metal_graph_compute(
|
|||
[encoder setBytes:&ne3 length:sizeof( int64_t) atIndex:26];
|
||||
[encoder setBytes:&scale length:sizeof( float) atIndex:27];
|
||||
|
||||
const int64_t nwarps = 2;
|
||||
const int64_t nwarps = 4;
|
||||
|
||||
const size_t smem = nwarps*(2*8*nwarps*ne00 + 128)*(sizeof(float)/2);
|
||||
const size_t smem = nwarps*(2*4*ne00 + 128)*(sizeof(float)/2);
|
||||
|
||||
GGML_ASSERT(smem <= ctx->device.maxThreadgroupMemoryLength);
|
||||
[encoder setThreadgroupMemoryLength:smem atIndex:0];
|
||||
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(ne01, (ne02 + 8*nwarps - 1)/(8*nwarps), ne03) threadsPerThreadgroup:MTLSizeMake(32*nwarps, 1, 1)];
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(ne01, (ne02 + 4*nwarps - 1)/(4*nwarps), ne03) threadsPerThreadgroup:MTLSizeMake(32*nwarps, 1, 1)];
|
||||
} break;
|
||||
case GGML_OP_DUP:
|
||||
case GGML_OP_CPY:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue