metal : add poc for normalized Q4_0 and Q4_1

2023-08-30 18:32:43 +03:00 · 2023-08-30 18:32:43 +03:00 · b4e70822f6
commit b4e70822f6
parent 9ffe54ed10
2 changed files with 45 additions and 29 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -697,6 +697,9 @@ void ggml_metal_graph_compute(
                        } break;
                    case GGML_OP_MUL:
                        {
+                            GGML_ASSERT(ne00 % 4 == 0);
+                            const int64_t nb = ne00/4;
+
                            if (ggml_nelements(src1) == ne10) {
                                // src1 is a row
                                [encoder setComputePipelineState:ctx->pipeline_mul_row];
@ -706,9 +709,9 @@ void ggml_metal_graph_compute(
                            [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
                            [encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
                            [encoder setBuffer:id_dst  offset:offs_dst  atIndex:2];
-                            [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3];
+                            [encoder setBytes:&nb     length:sizeof(nb) atIndex:3];

-                            const int64_t n = ggml_nelements(dst);
+                            const int64_t n = ggml_nelements(dst)/4;

                            [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
                        } break;