From 11c7b1e25a506676768f38b3672e7e2191cec9bc Mon Sep 17 00:00:00 2001
From: "zhou.weiguo" <zhouwg2000@gmail.com>
Date: Fri, 14 Jun 2024 23:04:13 +0800
Subject: [PATCH] review: modify codes as review suggestion

---
 examples/benchmark/benchmark-matmult.cpp | 48 +++++-------------------
 1 file changed, 10 insertions(+), 38 deletions(-)
diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp
index 66c0b5443..7c8b6a639 100644
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -32,26 +32,11 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
     ggml_graph_compute(graph, &plan);
 }
 
-#define QK8_0 32
-
-typedef struct {
-    uint16_t d;       // delta
-    int8_t qs[QK8_0]; // quants
-} block_q8_0;
-
 static float tensor_sum_elements(const ggml_tensor * tensor) {
     double sum                  = 0;
     float  floatvalue           = 0;
     unsigned short shortvalue   = 0;
 
-    if (tensor->type == GGML_TYPE_F32) {
-        for (int j = 0; j < tensor->ne[1]; j++) {
-            for (int k = 0; k < tensor->ne[0]; k++) {
-                sum += ((float *) tensor->data)[j * tensor->ne[0] + k];
-            }
-        }
-    }
-
     if (tensor->type == GGML_TYPE_I8) {
         for (int j = 0; j < tensor->ne[1]; j++) {
             for (int k = 0; k < tensor->ne[0]; k++) {
@@ -70,34 +55,21 @@ static float tensor_sum_elements(const ggml_tensor * tensor) {
         }
     }
 
-    if (tensor->type == GGML_TYPE_Q8_0) {
-        block_q8_0 * quant_datas = (block_q8_0 *)tensor->data;
-#if 1
-        ggml_type_traits_t qtype = ggml_internal_get_type_traits(tensor->type);
-        float * float32 = (float*)malloc((tensor->ne[0] * tensor->ne[1]) * sizeof(float));
-        if (NULL == float32) {
-            printf("malloc failed\n");
-            return 0.0;
-        }
-        qtype.to_float(quant_datas, float32, tensor->ne[0] * tensor->ne[1]);
+    if (tensor->type == GGML_TYPE_F32) {
         for (int j = 0; j < tensor->ne[1]; j++) {
             for (int k = 0; k < tensor->ne[0]; k++) {
-                sum += float32[j * tensor->ne[0] + k];
+                sum += ((float *) tensor->data)[j * tensor->ne[0] + k];
             }
         }
-        free(float32);
-#else
-        int blocks = 0;
-        for (int j = 0; j < tensor->ne[1]; j++) {
-            blocks = tensor->ne[0] / QK8_0;
-            for (int i = 0; i < blocks; i++) {
-                floatvalue = GGML_FP16_TO_FP32(quant_datas[j * blocks + i].d);
-                for (int k = 0; k < QK8_0; k++) {
-                    sum += (quant_datas[j * blocks + i].qs[k] * floatvalue);
-                }
-            }
+    }
+
+    if (ggml_is_quantized(tensor->type)) {
+        std::vector<float> f32out(ggml_nelements(tensor));
+        ggml_type_traits_t qtype = ggml_internal_get_type_traits(tensor->type);
+        qtype.to_float((void *)tensor->data, f32out.data(), f32out.size());
+        for (const float & value : f32out) {
+            sum += value;
         }
-#endif
     }
 
     return sum;