ggml : fix BLAS with unsupported types (#9775)

* ggml : do not use BLAS with types without to_float

* ggml : return pointer from ggml_internal_get_type_traits to avoid unnecessary copies

* ggml : rename ggml_internal_get_type_traits -> ggml_get_type_traits

it's not really internal if everybody uses it
This commit is contained in:
Diego Devesa 2024-10-08 14:21:43 +02:00 committed by GitHub
parent 458367a906
commit dca1d4b58a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 75 additions and 74 deletions

View file

@ -133,7 +133,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
std::vector<uint8_t> buf(ggml_nbytes(t));
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));
ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type);
const auto * tt = ggml_get_type_traits(t->type);
size_t bs = ggml_blck_size(t->type);
std::vector<float> vq(ggml_blck_size(t->type));
bool quantized = ggml_is_quantized(t->type);
@ -159,7 +159,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
} else if (t->type == GGML_TYPE_I8) {
tv.push_back((float)*(int8_t *) &buf[i]);
} else if (quantized) {
tt.to_float(&buf[i], vq.data(), bs);
tt->to_float(&buf[i], vq.data(), bs);
tv.insert(tv.end(), vq.begin(), vq.end());
} else {
GGML_ABORT("fatal error");