ggml : remove ggml_task_type and GGML_PERF
ggml-ci
This commit is contained in:
parent
9c77ec1d74
commit
d27f26ea0c
4 changed files with 308 additions and 910 deletions
35
ggml.h
35
ggml.h
|
@ -585,11 +585,7 @@ extern "C" {
|
|||
struct ggml_tensor * grad;
|
||||
struct ggml_tensor * src[GGML_MAX_SRC];
|
||||
|
||||
// performance
|
||||
int perf_runs;
|
||||
int64_t perf_cycles;
|
||||
int64_t perf_time_us;
|
||||
|
||||
// source tensor and offset for views
|
||||
struct ggml_tensor * view_src;
|
||||
size_t view_offs;
|
||||
|
||||
|
@ -599,7 +595,7 @@ extern "C" {
|
|||
|
||||
void * extra; // extra things e.g. for ggml-cuda.cu
|
||||
|
||||
char padding[8];
|
||||
// char padding[4];
|
||||
};
|
||||
|
||||
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
||||
|
@ -646,11 +642,6 @@ extern "C" {
|
|||
struct ggml_hash_set visited_hash_table;
|
||||
|
||||
enum ggml_cgraph_eval_order order;
|
||||
|
||||
// performance
|
||||
int perf_runs;
|
||||
int64_t perf_cycles;
|
||||
int64_t perf_time_us;
|
||||
};
|
||||
|
||||
// scratch buffer
|
||||
|
@ -667,28 +658,6 @@ extern "C" {
|
|||
bool no_alloc; // don't allocate memory for the tensor data
|
||||
};
|
||||
|
||||
|
||||
// compute types
|
||||
|
||||
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
|
||||
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
|
||||
enum ggml_task_type {
|
||||
GGML_TASK_TYPE_INIT = 0,
|
||||
GGML_TASK_TYPE_COMPUTE,
|
||||
GGML_TASK_TYPE_FINALIZE,
|
||||
};
|
||||
|
||||
struct ggml_compute_params {
|
||||
enum ggml_task_type type;
|
||||
|
||||
// ith = thread index, nth = number of threads
|
||||
int ith, nth;
|
||||
|
||||
// work buffer for all threads
|
||||
size_t wsize;
|
||||
void * wdata;
|
||||
};
|
||||
|
||||
// numa strategies
|
||||
enum ggml_numa_strategy {
|
||||
GGML_NUMA_STRATEGY_DISABLED = 0,
|
||||
|
|
37
sgemm.cpp
37
sgemm.cpp
|
@ -249,8 +249,7 @@ class tinyBLAS {
|
|||
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
||||
}
|
||||
|
||||
void matmul(int64_t m, int64_t n, int task) {
|
||||
if (task == GGML_TASK_TYPE_COMPUTE)
|
||||
void matmul(int64_t m, int64_t n) {
|
||||
mnpack(0, m, 0, n);
|
||||
}
|
||||
|
||||
|
@ -458,8 +457,7 @@ class tinyBLAS_Q0_ARM {
|
|||
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
||||
}
|
||||
|
||||
void matmul(int64_t m, int64_t n, int task) {
|
||||
if (task == GGML_TASK_TYPE_COMPUTE)
|
||||
void matmul(int64_t m, int64_t n) {
|
||||
mnpack(0, m, 0, n);
|
||||
}
|
||||
|
||||
|
@ -596,8 +594,7 @@ class tinyBLAS_Q0_AVX {
|
|||
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
|
||||
}
|
||||
|
||||
void matmul(int64_t m, int64_t n, int task) {
|
||||
if (task == GGML_TASK_TYPE_COMPUTE)
|
||||
void matmul(int64_t m, int64_t n) {
|
||||
mnpack(0, m, 0, n);
|
||||
}
|
||||
|
||||
|
@ -829,7 +826,7 @@ class tinyBLAS_Q0_AVX {
|
|||
* For example, for single-threaded single-precision GEMM you can say
|
||||
*
|
||||
* llamafile_sgemm(m, n, k, A, lda, B, ldb, C, ldc,
|
||||
* 0, 1, GGML_TASK_TYPE_COMPUTE,
|
||||
* 0, 1,
|
||||
* GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32);
|
||||
*
|
||||
* @param m is rows in `A` and `C`
|
||||
|
@ -843,14 +840,13 @@ class tinyBLAS_Q0_AVX {
|
|||
* @param ldc is row stride of `C`
|
||||
* @param ith is thread id (must be less than `nth`)
|
||||
* @param nth is number of threads (must be greater than zero)
|
||||
* @param task is GGML task type
|
||||
* @param Atype is GGML data type of `A`
|
||||
* @param Btype is GGML data type of `B`
|
||||
* @param Ctype is GGML data type of `C`
|
||||
* @return true if this function was able to service the matmul request
|
||||
*/
|
||||
bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda, const void *B, int64_t ldb, void *C,
|
||||
int64_t ldc, int ith, int nth, int task, int Atype, int Btype, int Ctype) {
|
||||
int64_t ldc, int ith, int nth, int Atype, int Btype, int Ctype) {
|
||||
|
||||
assert(m >= 0);
|
||||
assert(n >= 0);
|
||||
|
@ -877,7 +873,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__AVX__) || defined(__AVX2__)
|
||||
if (k % 8)
|
||||
|
@ -887,7 +883,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__ARM_NEON)
|
||||
if (n < 4)
|
||||
|
@ -899,7 +895,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
|
@ -917,7 +913,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif (defined(__AVX__) || defined(__AVX2__)) && defined(__F16C__)
|
||||
if (k % 8)
|
||||
|
@ -929,7 +925,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && !defined(_MSC_VER)
|
||||
if (n < 8)
|
||||
|
@ -943,7 +939,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const ggml_fp16_t *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
|
||||
if (k % 4)
|
||||
|
@ -955,7 +951,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const float *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
|
@ -971,7 +967,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const block_q8_0 *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__ARM_FEATURE_DOTPROD)
|
||||
tinyBLAS_Q0_ARM<block_q8_0> tb{
|
||||
|
@ -979,7 +975,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const block_q8_0 *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
|
@ -995,7 +991,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const block_q8_0 *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#elif defined(__ARM_FEATURE_DOTPROD)
|
||||
tinyBLAS_Q0_ARM<block_q4_0> tb{
|
||||
|
@ -1003,7 +999,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(const block_q8_0 *)B, ldb,
|
||||
(float *)C, ldc,
|
||||
ith, nth};
|
||||
tb.matmul(m, n, task);
|
||||
tb.matmul(m, n);
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
|
@ -1025,7 +1021,6 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
|||
(void)ldc;
|
||||
(void)ith;
|
||||
(void)nth;
|
||||
(void)task;
|
||||
(void)Atype;
|
||||
(void)Btype;
|
||||
(void)Ctype;
|
||||
|
|
2
sgemm.h
2
sgemm.h
|
@ -7,7 +7,7 @@ extern "C" {
|
|||
|
||||
bool llamafile_sgemm(int64_t, int64_t, int64_t, const void *, int64_t,
|
||||
const void *, int64_t, void *, int64_t, int, int,
|
||||
int, int, int, int);
|
||||
int, int, int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue