ggml : poc for normalizing weights for better quantization (metal)

This commit is contained in:
Georgi Gerganov 2023-08-30 19:05:36 +03:00
parent b532a69b2f
commit 253eab8ae1
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
5 changed files with 295 additions and 139 deletions

View file

@ -697,6 +697,9 @@ void ggml_metal_graph_compute(
} break;
case GGML_OP_MUL:
{
GGML_ASSERT(ne00 % 4 == 0);
const int64_t nb = ne00/4;
if (ggml_nelements(src1) == ne10) {
// src1 is a row
[encoder setComputePipelineState:ctx->pipeline_mul_row];
@ -706,9 +709,9 @@ void ggml_metal_graph_compute(
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3];
[encoder setBytes:&nb length:sizeof(nb) atIndex:3];
const int64_t n = ggml_nelements(dst);
const int64_t n = ggml_nelements(dst)/4;
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
} break;