ggml : remove ggml_task_type and GGML_PERF (#8017)
* ggml : remove ggml_task_type and GGML_PERF * check abort_callback on main thread only * vulkan : remove usage of ggml_compute_params * remove LLAMA_PERF
This commit is contained in:
parent
e112b610a1
commit
95f57bb5d5
8 changed files with 402 additions and 1082 deletions
43
sgemm.cpp
43
sgemm.cpp
|
@ -249,9 +249,8 @@ class tinyBLAS {
|
|||
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
||||
}
|
||||
|
||||
void matmul(int64_t m, int64_t n, int task) {
|
||||
if (task == GGML_TASK_TYPE_COMPUTE)
|
||||
mnpack(0, m, 0, n);
|
||||
void matmul(int64_t m, int64_t n) {
|
||||
mnpack(0, m, 0, n);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -458,9 +457,8 @@ class tinyBLAS_Q0_ARM {
|
|||
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
||||
}
|
||||
|
||||
void matmul(int64_t m, int64_t n, int task) {
|
||||
if (task == GGML_TASK_TYPE_COMPUTE)
|
||||
mnpack(0, m, 0, n);
|
||||
void matmul(int64_t m, int64_t n) {
|
||||
mnpack(0, m, 0, n);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -596,9 +594,8 @@ class tinyBLAS_Q0_AVX {
|
|||
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
||||
}
|
||||
|
||||
void matmul(int64_t m, int64_t n, int task) {
|
||||
if (task == GGML_TASK_TYPE_COMPUTE)
|
||||
mnpack(0, m, 0, n);
|
||||
void matmul(int64_t m, int64_t n) {
|
||||
mnpack(0, m, 0, n);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -829,7 +826,7 @@ class tinyBLAS_Q0_AVX {
|
|||
* For example, for single-threaded single-precision GEMM you can say
|
||||
*
|
||||
* llamafile_sgemm(m, n, k, A, lda, B, ldb, C, ldc,
|
||||
* 0, 1, GGML_TASK_TYPE_COMPUTE,
|
||||
* 0, 1,
|
||||
* GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32);
|
||||
*
|
||||
* @param m is rows in `A` and `C`
|
||||
|
@ -843,14 +840,13 @@ class tinyBLAS_Q0_AVX {
|
|||
* @param ldc is row stride of `C`
|
||||
* @param ith is thread id (must be less than `nth`)
|
||||
* @param nth is number of threads (must be greater than zero)
|
||||
* @param task is GGML task type
|
||||
* @param Atype is GGML data type of `A`
|
||||
* @param Btype is GGML data type of `B`
|
||||
* @param Ctype is GGML data type of `C`
|
||||
* @return true if this function was able to service the matmul request
|
||||
*/
|
||||
bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda, const void *B, int64_t ldb, void *C,
|
||||
int64_t ldc, int ith, int nth, int task, int Atype, int Btype, int Ctype) {
|
||||
int64_t ldc, int ith, int nth, int Atype, int Btype, int Ctype) {
|
||||
|
||||
assert(m >= 0);
|
||||
assert(n >= 0);
|
||||
|
@ -877,7 +873,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__AVX__) || defined(__AVX2__)
|
||||
if (k % 8)
|
||||
|
@ -887,7 +883,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__ARM_NEON)
|
||||
if (n < 4)
|
||||
|
@ -899,7 +895,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
|
@ -917,7 +913,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif (defined(__AVX__) || defined(__AVX2__)) && defined(__F16C__)
|
||||
if (k % 8)
|
||||
|
@ -929,7 +925,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(_MSC_VER)
|
||||
if (n < 8)
|
||||
|
@ -943,7 +939,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const ggml_fp16_t *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
|
||||
if (k % 4)
|
||||
|
@ -955,7 +951,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
|
@ -971,7 +967,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const block_q8_0 *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__ARM_FEATURE_DOTPROD)
|
||||
tinyBLAS_Q0_ARM<block_q8_0> tb{
|
||||
|
@ -979,7 +975,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const block_q8_0 *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
|
@ -995,7 +991,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const block_q8_0 *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__ARM_FEATURE_DOTPROD)
|
||||
tinyBLAS_Q0_ARM<block_q4_0> tb{
|
||||
|
@ -1003,7 +999,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const block_q8_0 *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
|
@ -1025,7 +1021,6 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(void)ldc;
|
||||
(void)ith;
|
||||
(void)nth;
|
||||
(void)task;
|
||||
(void)Atype;
|
||||
(void)Btype;
|
||||
(void)Ctype;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue