revert library back first

This commit is contained in:
Concedo 2023-05-16 15:45:05 +08:00
parent e4e6994353
commit 554340f565

91
ggml.c
View file

@ -9327,7 +9327,7 @@ static void ggml_compute_forward_rms_norm_back(
// ggml_compute_forward_mul_mat // ggml_compute_forward_mul_mat
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
// helper function to determine if it is better to use BLAS or not // helper function to determine if it is better to use BLAS or not
// for large matrices, BLAS is faster // for large matrices, BLAS is faster
static bool ggml_compute_forward_mul_mat_use_blas( static bool ggml_compute_forward_mul_mat_use_blas(
@ -9368,7 +9368,7 @@ static void ggml_compute_forward_mul_mat_f32(
const int64_t ne02 = src0->ne[2]; const int64_t ne02 = src0->ne[2];
const int64_t ne03 = src0->ne[3]; const int64_t ne03 = src0->ne[3];
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
const int64_t ne10 = src1->ne[0]; const int64_t ne10 = src1->ne[0];
#endif #endif
const int64_t ne11 = src1->ne[1]; const int64_t ne11 = src1->ne[1];
@ -9441,7 +9441,7 @@ static void ggml_compute_forward_mul_mat_f32(
} }
#endif #endif
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)|| defined(GGML_USE_CLBLAST)
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
if (params->ith != 0) { if (params->ith != 0) {
return; return;
@ -9461,11 +9461,33 @@ static void ggml_compute_forward_mul_mat_f32(
const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13); const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3); float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
#if defined(GGML_USE_CLBLAST)
// zT = y * xT
if(quants_unshuffled)
{
ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01,
GGML_TYPE_F32);
}
else
{
ggml_cl_sgemm_wrapper_legacy(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01,
GGML_TYPE_F32);
}
#else
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
ne11, ne01, ne10, ne11, ne01, ne10,
1.0f, y, ne10, 1.0f, y, ne10,
x, ne00, x, ne00,
0.0f, d, ne01); 0.0f, d, ne01);
#endif
} }
} }
//printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3); //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
@ -9613,7 +9635,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
} }
#endif #endif
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)|| defined(GGML_USE_CLBLAST)
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
GGML_ASSERT(nb10 == sizeof(float)); GGML_ASSERT(nb10 == sizeof(float));
@ -9643,6 +9665,32 @@ static void ggml_compute_forward_mul_mat_f16_f32(
assert(id*sizeof(float) <= params->wsize); assert(id*sizeof(float) <= params->wsize);
} }
#if defined(GGML_USE_CLBLAST)
const float * x = wdata;
const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
// zT = y * xT
if(quants_unshuffled)
{
ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01,
GGML_TYPE_F32);
}
else
{
ggml_cl_sgemm_wrapper_legacy(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01,
GGML_TYPE_F32);
}
#else
const float * x = wdata; const float * x = wdata;
const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13); const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
@ -9654,6 +9702,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
1.0f, y, ne10, 1.0f, y, ne10,
x, ne00, x, ne00,
0.0f, d, ne01); 0.0f, d, ne01);
#endif
} }
} }
@ -9825,7 +9874,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
} }
#endif #endif
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)|| defined(GGML_USE_CLBLAST)
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
if (params->ith != 0) { if (params->ith != 0) {
return; return;
@ -9848,6 +9897,9 @@ static void ggml_compute_forward_mul_mat_q_f32(
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3); float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
#if defined(GGML_USE_CLBLAST)
const void* x = (char *) src0->data + i03*nb03 + i02*nb02;
#else
{ {
size_t id = 0; size_t id = 0;
for (int64_t i01 = 0; i01 < ne01; ++i01) { for (int64_t i01 = 0; i01 < ne01; ++i01) {
@ -9859,12 +9911,35 @@ static void ggml_compute_forward_mul_mat_q_f32(
} }
const float * x = wdata; const float * x = wdata;
#endif
#if defined(GGML_USE_CLBLAST)
// zT = y * xT
if(quants_unshuffled)
{
ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01,
type);
}
else
{
ggml_cl_sgemm_wrapper_legacy(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T,
ne11, ne01, ne10,
1.0f, y, ne10,
x, ne10,
0.0f, d, ne01,
type);
}
#else
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
ne11, ne01, ne10, ne11, ne01, ne10,
1.0f, y, ne10, 1.0f, y, ne10,
x, ne00, x, ne00,
0.0f, d, ne01); 0.0f, d, ne01);
#endif
} }
} }
@ -13979,7 +14054,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
else else
#endif #endif
if (node->src0->type == GGML_TYPE_F16 && node->src1->type == GGML_TYPE_F32) { if (node->src0->type == GGML_TYPE_F16 && node->src1->type == GGML_TYPE_F32) {
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)|| defined(GGML_USE_CLBLAST)
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
node->n_tasks = 1; // TODO: this actually is doing nothing node->n_tasks = 1; // TODO: this actually is doing nothing
// the threads are still spinning // the threads are still spinning
@ -13993,13 +14068,13 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
#endif #endif
} else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) { } else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) {
cur = 0; cur = 0;
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
node->n_tasks = 1; node->n_tasks = 1;
} }
#endif #endif
} else if (ggml_is_quantized(node->src0->type) && node->src1->type == GGML_TYPE_F32) { } else if (ggml_is_quantized(node->src0->type) && node->src1->type == GGML_TYPE_F32) {
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
node->n_tasks = 1; node->n_tasks = 1;
cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]); cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]);