ggml : fix BLAS with unsupported types (#9775)
* ggml : do not use BLAS with types without to_float * ggml : return pointer from ggml_internal_get_type_traits to avoid unnecessary copies * ggml : rename ggml_internal_get_type_traits -> ggml_get_type_traits it's not really internal if everybody uses it
This commit is contained in:
parent
458367a906
commit
dca1d4b58a
13 changed files with 75 additions and 74 deletions
|
@ -17872,10 +17872,9 @@ static void llama_tensor_dequantize_internal(
|
|||
}
|
||||
float * f32_output = (float *) output.data();
|
||||
|
||||
ggml_type_traits_t qtype;
|
||||
const ggml_type_traits * qtype = ggml_get_type_traits(tensor->type);
|
||||
if (ggml_is_quantized(tensor->type)) {
|
||||
qtype = ggml_internal_get_type_traits(tensor->type);
|
||||
if (qtype.to_float == NULL) {
|
||||
if (qtype->to_float == NULL) {
|
||||
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(tensor->type)));
|
||||
}
|
||||
} else if (tensor->type != GGML_TYPE_F16 &&
|
||||
|
@ -17889,7 +17888,7 @@ static void llama_tensor_dequantize_internal(
|
|||
} else if (tensor->type == GGML_TYPE_BF16) {
|
||||
ggml_bf16_to_fp32_row((ggml_bf16_t *)tensor->data, f32_output, nelements);
|
||||
} else if (ggml_is_quantized(tensor->type)) {
|
||||
qtype.to_float(tensor->data, f32_output, nelements);
|
||||
qtype->to_float(tensor->data, f32_output, nelements);
|
||||
} else {
|
||||
GGML_ABORT("fatal error"); // unreachable
|
||||
}
|
||||
|
@ -17925,7 +17924,7 @@ static void llama_tensor_dequantize_internal(
|
|||
} else if (typ == GGML_TYPE_BF16) {
|
||||
ggml_bf16_to_fp32_row((ggml_bf16_t *)inbuf, outbuf, nels);
|
||||
} else {
|
||||
qtype.to_float(inbuf, outbuf, nels);
|
||||
qtype->to_float(inbuf, outbuf, nels);
|
||||
}
|
||||
};
|
||||
workers.emplace_back(compute, tensor->type, (uint8_t *) tensor->data + in_buff_offs, f32_output + out_buff_offs, thr_elems);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue