metal : add poc for normalized Q4_0 and Q4_1

This commit is contained in:
Georgi Gerganov 2023-08-30 18:32:43 +03:00
parent 9ffe54ed10
commit b4e70822f6
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 45 additions and 29 deletions

View file

@ -697,6 +697,9 @@ void ggml_metal_graph_compute(
} break;
case GGML_OP_MUL:
{
GGML_ASSERT(ne00 % 4 == 0);
const int64_t nb = ne00/4;
if (ggml_nelements(src1) == ne10) {
// src1 is a row
[encoder setComputePipelineState:ctx->pipeline_mul_row];
@ -706,9 +709,9 @@ void ggml_metal_graph_compute(
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3];
[encoder setBytes:&nb length:sizeof(nb) atIndex:3];
const int64_t n = ggml_nelements(dst);
const int64_t n = ggml_nelements(dst)/4;
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
} break;