diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp index 8dc8bd4da..66c0b5443 100644 --- a/examples/benchmark/benchmark-matmult.cpp +++ b/examples/benchmark/benchmark-matmult.cpp @@ -71,8 +71,23 @@ static float tensor_sum_elements(const ggml_tensor * tensor) { } if (tensor->type == GGML_TYPE_Q8_0) { - int blocks = 0; block_q8_0 * quant_datas = (block_q8_0 *)tensor->data; +#if 1 + ggml_type_traits_t qtype = ggml_internal_get_type_traits(tensor->type); + float * float32 = (float*)malloc((tensor->ne[0] * tensor->ne[1]) * sizeof(float)); + if (NULL == float32) { + printf("malloc failed\n"); + return 0.0; + } + qtype.to_float(quant_datas, float32, tensor->ne[0] * tensor->ne[1]); + for (int j = 0; j < tensor->ne[1]; j++) { + for (int k = 0; k < tensor->ne[0]; k++) { + sum += float32[j * tensor->ne[0] + k]; + } + } + free(float32); +#else + int blocks = 0; for (int j = 0; j < tensor->ne[1]; j++) { blocks = tensor->ne[0] / QK8_0; for (int i = 0; i < blocks; i++) { @@ -82,6 +97,7 @@ static float tensor_sum_elements(const ggml_tensor * tensor) { } } } +#endif } return sum;