review: modify codes as review suggestion
This commit is contained in:
parent
fe59684e32
commit
11c7b1e25a
1 changed files with 10 additions and 38 deletions
|
@ -32,26 +32,11 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
|
||||||
ggml_graph_compute(graph, &plan);
|
ggml_graph_compute(graph, &plan);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define QK8_0 32
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint16_t d; // delta
|
|
||||||
int8_t qs[QK8_0]; // quants
|
|
||||||
} block_q8_0;
|
|
||||||
|
|
||||||
static float tensor_sum_elements(const ggml_tensor * tensor) {
|
static float tensor_sum_elements(const ggml_tensor * tensor) {
|
||||||
double sum = 0;
|
double sum = 0;
|
||||||
float floatvalue = 0;
|
float floatvalue = 0;
|
||||||
unsigned short shortvalue = 0;
|
unsigned short shortvalue = 0;
|
||||||
|
|
||||||
if (tensor->type == GGML_TYPE_F32) {
|
|
||||||
for (int j = 0; j < tensor->ne[1]; j++) {
|
|
||||||
for (int k = 0; k < tensor->ne[0]; k++) {
|
|
||||||
sum += ((float *) tensor->data)[j * tensor->ne[0] + k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tensor->type == GGML_TYPE_I8) {
|
if (tensor->type == GGML_TYPE_I8) {
|
||||||
for (int j = 0; j < tensor->ne[1]; j++) {
|
for (int j = 0; j < tensor->ne[1]; j++) {
|
||||||
for (int k = 0; k < tensor->ne[0]; k++) {
|
for (int k = 0; k < tensor->ne[0]; k++) {
|
||||||
|
@ -70,34 +55,21 @@ static float tensor_sum_elements(const ggml_tensor * tensor) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tensor->type == GGML_TYPE_Q8_0) {
|
if (tensor->type == GGML_TYPE_F32) {
|
||||||
block_q8_0 * quant_datas = (block_q8_0 *)tensor->data;
|
|
||||||
#if 1
|
|
||||||
ggml_type_traits_t qtype = ggml_internal_get_type_traits(tensor->type);
|
|
||||||
float * float32 = (float*)malloc((tensor->ne[0] * tensor->ne[1]) * sizeof(float));
|
|
||||||
if (NULL == float32) {
|
|
||||||
printf("malloc failed\n");
|
|
||||||
return 0.0;
|
|
||||||
}
|
|
||||||
qtype.to_float(quant_datas, float32, tensor->ne[0] * tensor->ne[1]);
|
|
||||||
for (int j = 0; j < tensor->ne[1]; j++) {
|
for (int j = 0; j < tensor->ne[1]; j++) {
|
||||||
for (int k = 0; k < tensor->ne[0]; k++) {
|
for (int k = 0; k < tensor->ne[0]; k++) {
|
||||||
sum += float32[j * tensor->ne[0] + k];
|
sum += ((float *) tensor->data)[j * tensor->ne[0] + k];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(float32);
|
}
|
||||||
#else
|
|
||||||
int blocks = 0;
|
if (ggml_is_quantized(tensor->type)) {
|
||||||
for (int j = 0; j < tensor->ne[1]; j++) {
|
std::vector<float> f32out(ggml_nelements(tensor));
|
||||||
blocks = tensor->ne[0] / QK8_0;
|
ggml_type_traits_t qtype = ggml_internal_get_type_traits(tensor->type);
|
||||||
for (int i = 0; i < blocks; i++) {
|
qtype.to_float((void *)tensor->data, f32out.data(), f32out.size());
|
||||||
floatvalue = GGML_FP16_TO_FP32(quant_datas[j * blocks + i].d);
|
for (const float & value : f32out) {
|
||||||
for (int k = 0; k < QK8_0; k++) {
|
sum += value;
|
||||||
sum += (quant_datas[j * blocks + i].qs[k] * floatvalue);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return sum;
|
return sum;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue