metal : faster q4_0 (#1775)
* metal : 8% faster q4_0 Avoid copying into local uchar4 anf float4. * metal : 17% faster Q4_0 Use 64 threads in a thread group. --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
parent
72ff5282bf
commit
245fc3c37d
2 changed files with 20 additions and 16 deletions
|
@ -526,7 +526,7 @@ void ggml_metal_graph_compute(
|
|||
GGML_ASSERT(ne12 == 1);
|
||||
|
||||
nth0 = 8;
|
||||
nth1 = 4;
|
||||
nth1 = 8;
|
||||
[encoder setComputePipelineState:ctx->pipeline_mul_mat_q4_0_f32];
|
||||
} break;
|
||||
case GGML_TYPE_Q2_K:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue