ggml : fix BLAS with unsupported types (#9775)
* ggml : do not use BLAS with types without to_float * ggml : return pointer from ggml_internal_get_type_traits to avoid unnecessary copies * ggml : rename ggml_internal_get_type_traits -> ggml_get_type_traits it's not really internal if everybody uses it
This commit is contained in:
parent
458367a906
commit
dca1d4b58a
13 changed files with 75 additions and 74 deletions
|
@ -136,7 +136,7 @@ int main(int argc, char** argv) {
|
|||
|
||||
auto ggml_type = type == 0 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q4_1;
|
||||
|
||||
auto funcs = ggml_internal_get_type_traits(ggml_type);
|
||||
const auto * funcs = ggml_get_type_traits(ggml_type);
|
||||
|
||||
Stat simple, ggml;
|
||||
|
||||
|
@ -156,8 +156,8 @@ int main(int argc, char** argv) {
|
|||
|
||||
t1 = std::chrono::high_resolution_clock::now();
|
||||
float fs;
|
||||
if (type == 0) funcs.vec_dot(kVecSize * QK4_1, &fs, 0, x40.data(), 0, y.data(), 0, 1);
|
||||
else funcs.vec_dot(kVecSize * QK4_1, &fs, 0, x41.data(), 0, y.data(), 0, 1);
|
||||
if (type == 0) funcs->vec_dot(kVecSize * QK4_1, &fs, 0, x40.data(), 0, y.data(), 0, 1);
|
||||
else funcs->vec_dot(kVecSize * QK4_1, &fs, 0, x41.data(), 0, y.data(), 0, 1);
|
||||
t2 = std::chrono::high_resolution_clock::now();
|
||||
t = 1e-3*std::chrono::duration_cast<std::chrono::nanoseconds>(t2-t1).count();
|
||||
if (iloop > 3) ggml.addResult(fs, t);
|
||||
|
|
|
@ -236,7 +236,7 @@ int main(int argc, char** argv) {
|
|||
int n4 = useQ4_1 ? kVecSize / QK4_1 : kVecSize / QK4_0; n4 = 64*((n4 + 63)/64);
|
||||
int n8 = kVecSize / QK8_0; n8 = 64*((n8 + 63)/64);
|
||||
|
||||
auto funcs = useQ4_1 ? ggml_internal_get_type_traits(GGML_TYPE_Q4_1) : ggml_internal_get_type_traits(GGML_TYPE_Q4_0);
|
||||
const auto * funcs = useQ4_1 ? ggml_get_type_traits(GGML_TYPE_Q4_1) : ggml_get_type_traits(GGML_TYPE_Q4_0);
|
||||
|
||||
std::vector<block_q4_0> q40;
|
||||
std::vector<block_q4_1> q41;
|
||||
|
@ -261,9 +261,9 @@ int main(int argc, char** argv) {
|
|||
// Note, we do not include this in the timing as in practical application
|
||||
// we already have the quantized model weights.
|
||||
if (useQ4_1) {
|
||||
funcs.from_float(x1.data(), q41.data(), kVecSize);
|
||||
funcs->from_float(x1.data(), q41.data(), kVecSize);
|
||||
} else {
|
||||
funcs.from_float(x1.data(), q40.data(), kVecSize);
|
||||
funcs->from_float(x1.data(), q40.data(), kVecSize);
|
||||
}
|
||||
|
||||
// Now measure time the dot product needs using the "scalar" version above
|
||||
|
@ -282,10 +282,10 @@ int main(int argc, char** argv) {
|
|||
dot_q4_q8(kVecSize, &result, q40.data(), q8.data());
|
||||
}
|
||||
else {
|
||||
auto vdot = ggml_internal_get_type_traits(funcs.vec_dot_type);
|
||||
vdot.from_float(y1.data(), q8.data(), kVecSize);
|
||||
if (useQ4_1) funcs.vec_dot(kVecSize, &result, 0, q41.data(), 0, q8.data(), 0, 1);
|
||||
else funcs.vec_dot(kVecSize, &result, 0, q40.data(), 0, q8.data(), 0, 1);
|
||||
const auto * vdot = ggml_get_type_traits(funcs->vec_dot_type);
|
||||
vdot->from_float(y1.data(), q8.data(), kVecSize);
|
||||
if (useQ4_1) funcs->vec_dot(kVecSize, &result, 0, q41.data(), 0, q8.data(), 0, 1);
|
||||
else funcs->vec_dot(kVecSize, &result, 0, q40.data(), 0, q8.data(), 0, 1);
|
||||
}
|
||||
sumq += result;
|
||||
t2 = std::chrono::high_resolution_clock::now();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue