ggml : remove ggml_task_type and GGML_PERF (#8017)

* ggml : remove ggml_task_type and GGML_PERF

* check abort_callback on main thread only

* vulkan : remove usage of ggml_compute_params

* remove LLAMA_PERF
This commit is contained in:
slaren 2024-06-24 03:07:59 +02:00 committed by GitHub
parent e112b610a1
commit 95f57bb5d5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 402 additions and 1082 deletions

View file

@ -249,9 +249,8 @@ class tinyBLAS {
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
}
void matmul(int64_t m, int64_t n, int task) {
if (task == GGML_TASK_TYPE_COMPUTE)
mnpack(0, m, 0, n);
void matmul(int64_t m, int64_t n) {
mnpack(0, m, 0, n);
}
private:
@ -458,9 +457,8 @@ class tinyBLAS_Q0_ARM {
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
}
void matmul(int64_t m, int64_t n, int task) {
if (task == GGML_TASK_TYPE_COMPUTE)
mnpack(0, m, 0, n);
void matmul(int64_t m, int64_t n) {
mnpack(0, m, 0, n);
}
private:
@ -596,9 +594,8 @@ class tinyBLAS_Q0_AVX {
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
}
void matmul(int64_t m, int64_t n, int task) {
if (task == GGML_TASK_TYPE_COMPUTE)
mnpack(0, m, 0, n);
void matmul(int64_t m, int64_t n) {
mnpack(0, m, 0, n);
}
private:
@ -829,7 +826,7 @@ class tinyBLAS_Q0_AVX {
* For example, for single-threaded single-precision GEMM you can say
*
* llamafile_sgemm(m, n, k, A, lda, B, ldb, C, ldc,
* 0, 1, GGML_TASK_TYPE_COMPUTE,
* 0, 1,
* GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32);
*
* @param m is rows in `A` and `C`
@ -843,14 +840,13 @@ class tinyBLAS_Q0_AVX {
* @param ldc is row stride of `C`
* @param ith is thread id (must be less than `nth`)
* @param nth is number of threads (must be greater than zero)
* @param task is GGML task type
* @param Atype is GGML data type of `A`
* @param Btype is GGML data type of `B`
* @param Ctype is GGML data type of `C`
* @return true if this function was able to service the matmul request
*/
bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda, const void *B, int64_t ldb, void *C,
int64_t ldc, int ith, int nth, int task, int Atype, int Btype, int Ctype) {
int64_t ldc, int ith, int nth, int Atype, int Btype, int Ctype) {
assert(m >= 0);
assert(n >= 0);
@ -877,7 +873,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(const float *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
tb.matmul(m, n);
return true;
#elif defined(__AVX__) || defined(__AVX2__)
if (k % 8)
@ -887,7 +883,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(const float *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
tb.matmul(m, n);
return true;
#elif defined(__ARM_NEON)
if (n < 4)
@ -899,7 +895,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(const float *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
tb.matmul(m, n);
return true;
#else
return false;
@ -917,7 +913,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(const float *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
tb.matmul(m, n);
return true;
#elif (defined(__AVX__) || defined(__AVX2__)) && defined(__F16C__)
if (k % 8)
@ -929,7 +925,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(const float *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
tb.matmul(m, n);
return true;
#elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(_MSC_VER)
if (n < 8)
@ -943,7 +939,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(const ggml_fp16_t *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
tb.matmul(m, n);
return true;
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
if (k % 4)
@ -955,7 +951,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(const float *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
tb.matmul(m, n);
return true;
#else
return false;
@ -971,7 +967,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(const block_q8_0 *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
tb.matmul(m, n);
return true;
#elif defined(__ARM_FEATURE_DOTPROD)
tinyBLAS_Q0_ARM<block_q8_0> tb{
@ -979,7 +975,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(const block_q8_0 *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
tb.matmul(m, n);
return true;
#else
return false;
@ -995,7 +991,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(const block_q8_0 *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
tb.matmul(m, n);
return true;
#elif defined(__ARM_FEATURE_DOTPROD)
tinyBLAS_Q0_ARM<block_q4_0> tb{
@ -1003,7 +999,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(const block_q8_0 *)B, ldb,
(float *)C, ldc,
ith, nth};
tb.matmul(m, n, task);
tb.matmul(m, n);
return true;
#else
return false;
@ -1025,7 +1021,6 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
(void)ldc;
(void)ith;
(void)nth;
(void)task;
(void)Atype;
(void)Btype;
(void)Ctype;