ggml : testing GPU FP precision via quantized CPY

2023-12-30 13:22:57 +02:00 · 2023-12-30 13:22:57 +02:00 · f64e4f04e7
commit f64e4f04e7
parent 24a447e20a
3 changed files with 23 additions and 7 deletions
--- a/ggml-quants.c
+++ b/ggml-quants.c
@ -1103,6 +1103,8 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
    }
 }

+#include <stdio.h>
+
 void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) {
    static const int qk = QK4_1;

@ -1110,6 +1112,9 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int

    const int nb = k / qk;

+    printf("d = %9f\n", GGML_FP16_TO_FP32(x[0].d));
+    printf("m = %9f\n", GGML_FP16_TO_FP32(x[0].m));
+
    for (int i = 0; i < nb; i++) {
        const float d = GGML_FP16_TO_FP32(x[i].d);
        const float m = GGML_FP16_TO_FP32(x[i].m);