metal : works with ne00 % 4 == 0

This commit is contained in:
Georgi Gerganov 2024-02-08 13:26:50 +02:00
parent e68e32548f
commit 845876d012
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
3 changed files with 31 additions and 18 deletions

View file

@ -1348,7 +1348,7 @@ static bool ggml_metal_graph_compute(
const int nsh1 = 64;
GGML_ASSERT(ne00 % 4 == 0); // for zeroing shared memory with half4 / float4
GGML_ASSERT(ne00 % 16 == 0); // dequantize in chunks of 16
//GGML_ASSERT(ne00 % 16 == 0); // dequantize in chunks of 16
GGML_ASSERT(nsh0 % 2 == 0); // dequantize in chunks of 2x8 = 16
GGML_ASSERT(nsh1 % nsh0 == 0);
GGML_ASSERT(nsh0 >= 2*nsg1); // need enough memory to store the results in f32