metal : print more GPU info + disable mul_mm for MTLGPUFamiliy < Apple7

This commit is contained in:
Georgi Gerganov 2023-10-08 09:53:38 +03:00
parent 545b03491c
commit 6b9554a740
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 65 additions and 42 deletions

View file

@ -2332,7 +2332,7 @@ kernel void kernel_get_rows(
}
#define BLOCK_SIZE_M 64 // 8 simdgroup matrices from matrix A
#define BLOCK_SIZE_N 32 // 4 simdgroup matrices from matrix A
#define BLOCK_SIZE_N 32 // 4 simdgroup matrices from matrix B
#define BLOCK_SIZE_K 32
#define THREAD_MAT_M 4 // each thread take 4 simdgroup matrices from matrix A
#define THREAD_MAT_N 2 // each thread take 2 simdgroup matrices from matrix B
@ -2459,7 +2459,8 @@ kernel void kernel_mul_mm(device const uchar * src0,
}
threadgroup_barrier(mem_flags::mem_threadgroup);
device float * C = dst + BLOCK_SIZE_M * r0 + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
device float * C = dst + (BLOCK_SIZE_M * r0) + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
if (sgitg == 0) {
for (int i = 0; i < n_rows; i++) {
for (int j = tiitg; j < n_cols; j += BLOCK_SIZE_N) {