metal : trying bs = 512 performance (wip)

This commit is contained in:
Georgi Gerganov 2024-02-12 19:21:57 +02:00
parent e8b00e2941
commit 5a668ea000
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
3 changed files with 44 additions and 15 deletions

View file

@ -1301,7 +1301,7 @@ static bool ggml_metal_graph_compute(
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
if (src1t == GGML_TYPE_F32 && ne11 <= 8) {
if (src1t == GGML_TYPE_F32) {
id<MTLComputePipelineState> pipeline = nil;
switch (src0->type) {
@ -1340,12 +1340,12 @@ static bool ggml_metal_graph_compute(
[encoder setBytes:&r2 length:sizeof(r2) atIndex:13];
[encoder setBytes:&r3 length:sizeof(r3) atIndex:14];
const int nsg = 8;
const int nsg = 4;
const int nsg0 = 1;
const int nsh0 = 16;
const int nsg1 = 1;
const int nsh1 = 64;
const int nsg0 = 4;
const int nsh0 = 4;
const int nsg1 = 2;
const int nsh1 = 4;
GGML_ASSERT(ne00 % 4 == 0); // for zeroing shared memory with half4 / float4
//GGML_ASSERT(ne00 % 16 == 0); // dequantize in chunks of 16