ggml : more perfo with llamafile tinyblas on x86_64 (#10714)
* more perfo with llamafile tinyblas on x86_64. - add bf16 suport - change dispache strategie (thanks: https://github.com/ikawrakow/ik_llama.cpp/pull/71 ) - reduce memory bandwidth simple tinyblas dispache and more cache freindly * tinyblas dynamic dispaching * sgemm: add M blocs. * - git 2.47 use short id of len 9. - show-progress is not part of GNU Wget2 * remove not stable test
This commit is contained in:
parent
09fe2e7613
commit
2cd43f4900
6 changed files with 287 additions and 278 deletions
|
@ -7419,14 +7419,14 @@ static void ggml_compute_forward_mul_mat(
|
|||
if (src1_cont) {
|
||||
for (int64_t i13 = 0; i13 < ne13; i13++)
|
||||
for (int64_t i12 = 0; i12 < ne12; i12++)
|
||||
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
|
||||
if (!llamafile_sgemm(params,
|
||||
ne01, ne11, ne00/ggml_blck_size(src0->type),
|
||||
(const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
|
||||
nb01/ggml_type_size(src0->type),
|
||||
(const char *)src1->data + i12*nb12 + i13*nb13,
|
||||
nb11/ggml_type_size(src1->type),
|
||||
(char *)dst->data + i12*nb2 + i13*nb3,
|
||||
nb1/ggml_type_size(dst->type),
|
||||
ith, nth,
|
||||
src0->type,
|
||||
src1->type,
|
||||
dst->type))
|
||||
|
@ -7471,14 +7471,14 @@ UseGgmlGemm1:;
|
|||
|
||||
for (int64_t i13 = 0; i13 < ne13; i13++)
|
||||
for (int64_t i12 = 0; i12 < ne12; i12++)
|
||||
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
|
||||
if (!llamafile_sgemm(params,
|
||||
ne01, ne11, ne00/ggml_blck_size(src0->type),
|
||||
(const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
|
||||
nb01/ggml_type_size(src0->type),
|
||||
(const char *)wdata + (i12*ne11 + i13*ne12*ne11)*row_size,
|
||||
row_size/ggml_type_size(vec_dot_type),
|
||||
(char *)dst->data + i12*nb2 + i13*nb3,
|
||||
nb1/ggml_type_size(dst->type),
|
||||
ith, nth,
|
||||
src0->type,
|
||||
vec_dot_type,
|
||||
dst->type))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue