metal : use mm kernels for batch size > 2
This commit is contained in:
parent
e9463792d3
commit
4c72ab13b2
1 changed files with 1 additions and 1 deletions
|
@ -958,7 +958,7 @@ void ggml_metal_graph_compute(
|
||||||
src1t == GGML_TYPE_F32 &&
|
src1t == GGML_TYPE_F32 &&
|
||||||
[ctx->device supportsFamily:MTLGPUFamilyApple7] &&
|
[ctx->device supportsFamily:MTLGPUFamilyApple7] &&
|
||||||
ne00%32 == 0 &&
|
ne00%32 == 0 &&
|
||||||
ne11 > 1) {
|
ne11 > 2) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32: [encoder setComputePipelineState:ctx->pipeline_mul_mm_f32_f32]; break;
|
case GGML_TYPE_F32: [encoder setComputePipelineState:ctx->pipeline_mul_mm_f32_f32]; break;
|
||||||
case GGML_TYPE_F16: [encoder setComputePipelineState:ctx->pipeline_mul_mm_f16_f32]; break;
|
case GGML_TYPE_F16: [encoder setComputePipelineState:ctx->pipeline_mul_mm_f16_f32]; break;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue