ggml : build backends as libraries (#10256)
* ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com>
This commit is contained in:
parent
4a8ccb37ad
commit
ae8de6d50a
191 changed files with 17541 additions and 16879 deletions
|
@ -237,7 +237,6 @@ int main(int argc, char** argv) {
|
|||
int n4 = useQ4_1 ? kVecSize / QK4_1 : kVecSize / QK4_0; n4 = 64*((n4 + 63)/64);
|
||||
int n8 = kVecSize / QK8_0; n8 = 64*((n8 + 63)/64);
|
||||
|
||||
const auto * funcs = ggml_get_type_traits(useQ4_1 ? GGML_TYPE_Q4_1 : GGML_TYPE_Q4_0);
|
||||
const auto * funcs_cpu = ggml_get_type_traits_cpu(useQ4_1 ? GGML_TYPE_Q4_1 : GGML_TYPE_Q4_0);
|
||||
|
||||
std::vector<block_q4_0> q40;
|
||||
|
@ -263,9 +262,9 @@ int main(int argc, char** argv) {
|
|||
// Note, we do not include this in the timing as in practical application
|
||||
// we already have the quantized model weights.
|
||||
if (useQ4_1) {
|
||||
funcs->from_float(x1.data(), q41.data(), kVecSize);
|
||||
funcs_cpu->from_float(x1.data(), q41.data(), kVecSize);
|
||||
} else {
|
||||
funcs->from_float(x1.data(), q40.data(), kVecSize);
|
||||
funcs_cpu->from_float(x1.data(), q40.data(), kVecSize);
|
||||
}
|
||||
|
||||
// Now measure time the dot product needs using the "scalar" version above
|
||||
|
@ -284,7 +283,7 @@ int main(int argc, char** argv) {
|
|||
dot_q4_q8(kVecSize, &result, q40.data(), q8.data());
|
||||
}
|
||||
else {
|
||||
const auto * vdot = ggml_get_type_traits(funcs_cpu->vec_dot_type);
|
||||
const auto * vdot = ggml_get_type_traits_cpu(funcs_cpu->vec_dot_type);
|
||||
vdot->from_float(y1.data(), q8.data(), kVecSize);
|
||||
if (useQ4_1) funcs_cpu->vec_dot(kVecSize, &result, 0, q41.data(), 0, q8.data(), 0, 1);
|
||||
else funcs_cpu->vec_dot(kVecSize, &result, 0, q40.data(), 0, q8.data(), 0, 1);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue