ggml : fix BLAS with unsupported types (#9775)
* ggml : do not use BLAS with types without to_float * ggml : return pointer from ggml_internal_get_type_traits to avoid unnecessary copies * ggml : rename ggml_internal_get_type_traits -> ggml_get_type_traits it's not really internal if everybody uses it
This commit is contained in:
parent
458367a906
commit
dca1d4b58a
13 changed files with 75 additions and 74 deletions
|
@ -133,7 +133,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|||
std::vector<uint8_t> buf(ggml_nbytes(t));
|
||||
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));
|
||||
|
||||
ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type);
|
||||
const auto * tt = ggml_get_type_traits(t->type);
|
||||
size_t bs = ggml_blck_size(t->type);
|
||||
std::vector<float> vq(ggml_blck_size(t->type));
|
||||
bool quantized = ggml_is_quantized(t->type);
|
||||
|
@ -159,7 +159,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|||
} else if (t->type == GGML_TYPE_I8) {
|
||||
tv.push_back((float)*(int8_t *) &buf[i]);
|
||||
} else if (quantized) {
|
||||
tt.to_float(&buf[i], vq.data(), bs);
|
||||
tt->to_float(&buf[i], vq.data(), bs);
|
||||
tv.insert(tv.end(), vq.begin(), vq.end());
|
||||
} else {
|
||||
GGML_ABORT("fatal error");
|
||||
|
|
|
@ -44,26 +44,26 @@ static float array_rmse(const float * a1, const float * a2, size_t n) {
|
|||
}
|
||||
|
||||
// Total quantization error on test data
|
||||
static float total_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) {
|
||||
static float total_quantization_error(const ggml_type_traits * qfns, size_t test_size, const float * test_data) {
|
||||
std::vector<uint8_t> tmp_q(2*test_size);
|
||||
std::vector<float> tmp_out(test_size);
|
||||
|
||||
qfns.from_float(test_data, tmp_q.data(), test_size);
|
||||
qfns.to_float(tmp_q.data(), tmp_out.data(), test_size);
|
||||
qfns->from_float(test_data, tmp_q.data(), test_size);
|
||||
qfns->to_float(tmp_q.data(), tmp_out.data(), test_size);
|
||||
return array_rmse(test_data, tmp_out.data(), test_size);
|
||||
}
|
||||
|
||||
// Total quantization error on test data
|
||||
static float reference_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) {
|
||||
static float reference_quantization_error(const ggml_type_traits * qfns, size_t test_size, const float * test_data) {
|
||||
std::vector<uint8_t> tmp_q(2*test_size);
|
||||
std::vector<float> tmp_out(test_size);
|
||||
std::vector<float> tmp_out_ref(test_size);
|
||||
|
||||
qfns.from_float(test_data, tmp_q.data(), test_size);
|
||||
qfns.to_float(tmp_q.data(), tmp_out.data(), test_size);
|
||||
qfns->from_float(test_data, tmp_q.data(), test_size);
|
||||
qfns->to_float(tmp_q.data(), tmp_out.data(), test_size);
|
||||
|
||||
qfns.from_float_ref(test_data, tmp_q.data(), test_size);
|
||||
qfns.to_float(tmp_q.data(), tmp_out_ref.data(), test_size);
|
||||
qfns->from_float_ref(test_data, tmp_q.data(), test_size);
|
||||
qfns->to_float(tmp_q.data(), tmp_out_ref.data(), test_size);
|
||||
|
||||
return array_rmse(tmp_out.data(), tmp_out_ref.data(), test_size);
|
||||
}
|
||||
|
@ -78,18 +78,18 @@ static float dot_product(const float * a1, const float * a2, size_t test_size) {
|
|||
|
||||
// Total dot product error
|
||||
static float dot_product_error(
|
||||
ggml_type_traits_t & qfns, size_t test_size, const float * test_data1, const float *test_data2
|
||||
const ggml_type_traits * qfns, size_t test_size, const float * test_data1, const float *test_data2
|
||||
) {
|
||||
std::vector<uint8_t> tmp_q1(2*test_size);
|
||||
std::vector<uint8_t> tmp_q2(2*test_size);
|
||||
|
||||
auto vdot = ggml_internal_get_type_traits(qfns.vec_dot_type);
|
||||
const auto * vdot = ggml_get_type_traits(qfns->vec_dot_type);
|
||||
|
||||
qfns.from_float(test_data1, tmp_q1.data(), test_size);
|
||||
vdot.from_float(test_data2, tmp_q2.data(), test_size);
|
||||
qfns->from_float(test_data1, tmp_q1.data(), test_size);
|
||||
vdot->from_float(test_data2, tmp_q2.data(), test_size);
|
||||
|
||||
float result = INFINITY;
|
||||
qfns.vec_dot(test_size, &result, 0, tmp_q1.data(), 0, tmp_q2.data(), 0, 1);
|
||||
qfns->vec_dot(test_size, &result, 0, tmp_q1.data(), 0, tmp_q2.data(), 0, 1);
|
||||
|
||||
const float dot_ref = dot_product(test_data1, test_data2, test_size);
|
||||
|
||||
|
@ -131,10 +131,10 @@ int main(int argc, char * argv[]) {
|
|||
|
||||
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||
ggml_type type = (ggml_type) i;
|
||||
ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
|
||||
const auto * qfns = ggml_get_type_traits(type);
|
||||
|
||||
// deprecated - skip
|
||||
if (qfns.blck_size == 0) {
|
||||
if (qfns->blck_size == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -143,7 +143,7 @@ int main(int argc, char * argv[]) {
|
|||
printf("Testing %s\n", ggml_type_name((ggml_type) i));
|
||||
ggml_quantize_init(ei);
|
||||
|
||||
if (qfns.from_float && qfns.to_float) {
|
||||
if (qfns->from_float && qfns->to_float) {
|
||||
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
|
||||
const float max_quantization_error =
|
||||
type == GGML_TYPE_TQ1_0 ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
|
||||
|
|
|
@ -122,9 +122,9 @@ static void usage(char * argv[]) {
|
|||
printf(" --type TYPE set test type as");
|
||||
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||
ggml_type type = (ggml_type) i;
|
||||
ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
|
||||
const auto * qfns = ggml_get_type_traits(type);
|
||||
if (ggml_type_name(type) != NULL) {
|
||||
if (qfns.from_float && qfns.to_float) {
|
||||
if (qfns->from_float && qfns->to_float) {
|
||||
printf(" %s", ggml_type_name(type));
|
||||
}
|
||||
}
|
||||
|
@ -270,12 +270,12 @@ int main(int argc, char * argv[]) {
|
|||
|
||||
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||
ggml_type type = (ggml_type) i;
|
||||
ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
|
||||
const auto * qfns = ggml_get_type_traits(type);
|
||||
if (!params.include_types.empty() && ggml_type_name(type) && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (qfns.from_float && qfns.to_float) {
|
||||
if (qfns->from_float && qfns->to_float) {
|
||||
printf("%s\n", ggml_type_name(type));
|
||||
|
||||
ggml_quantize_init(type);
|
||||
|
@ -285,7 +285,7 @@ int main(int argc, char * argv[]) {
|
|||
for (size_t size : params.test_sizes) {
|
||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||
auto quantize_fn = [&](void) -> float {
|
||||
qfns.from_float_ref(test_data1, test_q1, size);
|
||||
qfns->from_float_ref(test_data1, test_q1, size);
|
||||
return test_q1[0];
|
||||
};
|
||||
size_t quantized_size = ggml_row_size(type, size);
|
||||
|
@ -299,7 +299,7 @@ int main(int argc, char * argv[]) {
|
|||
for (size_t size : params.test_sizes) {
|
||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||
auto quantize_fn = [&](void) -> float {
|
||||
qfns.from_float(test_data1, test_q1, size);
|
||||
qfns->from_float(test_data1, test_q1, size);
|
||||
return test_q1[0];
|
||||
};
|
||||
size_t quantized_size = ggml_row_size(type, size);
|
||||
|
@ -310,11 +310,11 @@ int main(int argc, char * argv[]) {
|
|||
|
||||
if (params.op_dequantize_row_q) {
|
||||
printf(" dequantize_row_q\n");
|
||||
qfns.from_float(test_data1, test_q1, largest);
|
||||
qfns->from_float(test_data1, test_q1, largest);
|
||||
for (size_t size : params.test_sizes) {
|
||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||
auto quantize_fn = [&](void) -> float {
|
||||
qfns.to_float(test_q1, test_out, size);
|
||||
qfns->to_float(test_q1, test_out, size);
|
||||
return test_out[0];
|
||||
};
|
||||
size_t quantized_size = ggml_row_size(type, size);
|
||||
|
@ -328,8 +328,8 @@ int main(int argc, char * argv[]) {
|
|||
for (size_t size : params.test_sizes) {
|
||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||
auto quantize_fn = [&](void) -> float {
|
||||
auto vdot = ggml_internal_get_type_traits(qfns.vec_dot_type);
|
||||
vdot.from_float(test_data1, test_q1, size);
|
||||
const auto * vdot = ggml_get_type_traits(qfns->vec_dot_type);
|
||||
vdot->from_float(test_data1, test_q1, size);
|
||||
return test_q1[0];
|
||||
};
|
||||
size_t quantized_size = ggml_row_size(type, size);
|
||||
|
@ -340,13 +340,13 @@ int main(int argc, char * argv[]) {
|
|||
|
||||
if (params.op_vec_dot_q) {
|
||||
printf(" vec_dot_q\n");
|
||||
qfns.from_float(test_data1, test_q1, largest);
|
||||
qfns.from_float(test_data2, test_q2, largest);
|
||||
qfns->from_float(test_data1, test_q1, largest);
|
||||
qfns->from_float(test_data2, test_q2, largest);
|
||||
for (size_t size : params.test_sizes) {
|
||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||
auto quantize_fn = [&](void) -> float {
|
||||
float result;
|
||||
qfns.vec_dot(size, &result, 0, test_q1, 0, test_q2, 0, 1);
|
||||
qfns->vec_dot(size, &result, 0, test_q1, 0, test_q2, 0, 1);
|
||||
return result;
|
||||
};
|
||||
size_t quantized_size = ggml_row_size(type, size);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue