Better use 32 thread groups for f16 x f32

This commit is contained in:
Iwan Kawrakow 2023-09-01 10:44:13 +03:00
parent af226bd26e
commit cad50d1971

View file

@ -840,7 +840,7 @@ void ggml_metal_graph_compute(
switch (src0t) { switch (src0t) {
case GGML_TYPE_F16: case GGML_TYPE_F16:
{ {
nth0 = 64; nth0 = 32;
nth1 = 1; nth1 = 1;
[encoder setComputePipelineState:ctx->pipeline_mul_mat_f16_f32]; [encoder setComputePipelineState:ctx->pipeline_mul_mat_f16_f32];
} break; } break;