From 11c7b1e25a506676768f38b3672e7e2191cec9bc Mon Sep 17 00:00:00 2001 From: "zhou.weiguo" Date: Fri, 14 Jun 2024 23:04:13 +0800 Subject: [PATCH] review: modify codes as review suggestion --- examples/benchmark/benchmark-matmult.cpp | 48 +++++------------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp index 66c0b5443..7c8b6a639 100644 --- a/examples/benchmark/benchmark-matmult.cpp +++ b/examples/benchmark/benchmark-matmult.cpp @@ -32,26 +32,11 @@ static void ggml_graph_compute_helper(std::vector & buf, ggml_cgraph * ggml_graph_compute(graph, &plan); } -#define QK8_0 32 - -typedef struct { - uint16_t d; // delta - int8_t qs[QK8_0]; // quants -} block_q8_0; - static float tensor_sum_elements(const ggml_tensor * tensor) { double sum = 0; float floatvalue = 0; unsigned short shortvalue = 0; - if (tensor->type == GGML_TYPE_F32) { - for (int j = 0; j < tensor->ne[1]; j++) { - for (int k = 0; k < tensor->ne[0]; k++) { - sum += ((float *) tensor->data)[j * tensor->ne[0] + k]; - } - } - } - if (tensor->type == GGML_TYPE_I8) { for (int j = 0; j < tensor->ne[1]; j++) { for (int k = 0; k < tensor->ne[0]; k++) { @@ -70,34 +55,21 @@ static float tensor_sum_elements(const ggml_tensor * tensor) { } } - if (tensor->type == GGML_TYPE_Q8_0) { - block_q8_0 * quant_datas = (block_q8_0 *)tensor->data; -#if 1 - ggml_type_traits_t qtype = ggml_internal_get_type_traits(tensor->type); - float * float32 = (float*)malloc((tensor->ne[0] * tensor->ne[1]) * sizeof(float)); - if (NULL == float32) { - printf("malloc failed\n"); - return 0.0; - } - qtype.to_float(quant_datas, float32, tensor->ne[0] * tensor->ne[1]); + if (tensor->type == GGML_TYPE_F32) { for (int j = 0; j < tensor->ne[1]; j++) { for (int k = 0; k < tensor->ne[0]; k++) { - sum += float32[j * tensor->ne[0] + k]; + sum += ((float *) tensor->data)[j * tensor->ne[0] + k]; } } - free(float32); -#else - int blocks = 0; - for (int j = 0; j < tensor->ne[1]; j++) { - blocks = tensor->ne[0] / QK8_0; - for (int i = 0; i < blocks; i++) { - floatvalue = GGML_FP16_TO_FP32(quant_datas[j * blocks + i].d); - for (int k = 0; k < QK8_0; k++) { - sum += (quant_datas[j * blocks + i].qs[k] * floatvalue); - } - } + } + + if (ggml_is_quantized(tensor->type)) { + std::vector f32out(ggml_nelements(tensor)); + ggml_type_traits_t qtype = ggml_internal_get_type_traits(tensor->type); + qtype.to_float((void *)tensor->data, f32out.data(), f32out.size()); + for (const float & value : f32out) { + sum += value; } -#endif } return sum;