add opecnl op func and call

This commit is contained in:
ShuyRoy 2024-01-28 16:36:58 +08:00
parent 0bd702984c
commit 3201a6df2c
2 changed files with 354 additions and 3 deletions

View file

@ -1338,6 +1338,8 @@ void ggml_cl_free_data(const struct ggml_tensor* tensor) {
clReleaseMemObject(mem); clReleaseMemObject(mem);
} }
//将数据从host端主机端即CPU和其内存复制到device端设备端通常是GPU和其内存上的一个OpenCL缓冲区
static cl_int ggml_cl_h2d_tensor_2d(cl_command_queue queue, cl_mem dst, size_t offset, const struct ggml_tensor * src, uint64_t i3, uint64_t i2, cl_event* ev) { static cl_int ggml_cl_h2d_tensor_2d(cl_command_queue queue, cl_mem dst, size_t offset, const struct ggml_tensor * src, uint64_t i3, uint64_t i2, cl_event* ev) {
cl_int err; cl_int err;
const uint64_t ne0 = src->ne[0]; const uint64_t ne0 = src->ne[0];
@ -1867,6 +1869,254 @@ size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct g
return 0; return 0;
} }
// xr
static viod ggml_compute_cl_dup_same_cont(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst){
}
static viod ggml_compute_cl_dup_f16(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst){
}
static viod ggml_compute_cl_dup_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst){
}
// xr
static void ggml_cl_softmax(const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst){
}
// wh
static void ggml_compute_cl_abs_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
}
// xr
static void ggml_compute_cl_sgn_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
}
// xr
static void ggml_compute_cl_neg_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
}
// xr
static void ggml_compute_cl_step_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
}
// xr
static void ggml_compute_cl_tanh_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
}
// xr
static void ggml_compute_cl_elu_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
}
// xr
static void ggml_compute_cl_relu_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
}
// xr
static void ggml_compute_cl_gelu_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
}
// xr
static void ggml_compute_cl_gelu_quick_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
}
// xr
static void ggml_compute_cl_silu_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
}
// xr NOP
static void ggml_cl_permute(){
}
// xr NOP
static void ggml_cl_transpose(){
}
// xr NOP
static void ggml_cl_view(const struct ggml_compute_params * params, const struc ggml_tensor * src0, struct ggml_tensor * dst){
}
// xr NOP
static void ggml_cl_reshape(const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst){
}
// wsy
static void ggml_cl_rope_f32(const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst,
const bool forward){
}
// wsy
static void ggml_cl_rope_f16(const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst,
const bool forward){
}
// wsy
static void ggml_cl_rms_norm_f32(const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst){
}
// zjl ggml_cl_add
static void ggml_compute_cl_add_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst){
}
// zjl
static void ggml_compute_cl_add_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst){
}
// zjl
static void ggml_compute_cl_add_f16_f16(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst){
}
// zjl
static void ggml_compute_cl_add_f16_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst){
}
// zjl
static void ggml_compute_cl_add_q_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst){
}
// wh ggml_comput_cl_get_rows
static void ggml_compute_cl_get_rows_q(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst){
}
// wh
static void ggml_compute_cl_get_rows_f16(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst){
}
// wh
static void ggml_compute_cl_get_rows_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst){
}
void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) { void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) {
const int64_t ne0 = tensor->ne[0]; const int64_t ne0 = tensor->ne[0];
const int64_t ne1 = tensor->ne[1]; const int64_t ne1 = tensor->ne[1];

107
ggml.c
View file

@ -7115,16 +7115,28 @@ static void ggml_compute_forward_dup(
const struct ggml_tensor * src0, const struct ggml_tensor * src0,
struct ggml_tensor * dst) { struct ggml_tensor * dst) {
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_dup_same_cont(params, src0, dst);
return;
#endif
ggml_compute_forward_dup_same_cont(params, src0, dst); ggml_compute_forward_dup_same_cont(params, src0, dst);
return; return;
} }
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F16: case GGML_TYPE_F16:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_dup_f16(params, src0, dst);
return;
#endif
ggml_compute_forward_dup_f16(params, src0, dst); ggml_compute_forward_dup_f16(params, src0, dst);
} break; } break;
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_dup_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_dup_f32(params, src0, dst); ggml_compute_forward_dup_f32(params, src0, dst);
} break; } break;
default: default:
@ -7157,6 +7169,11 @@ static void ggml_compute_forward_add_f32(
GGML_ASSERT( nb0 == sizeof(float)); GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float)); GGML_ASSERT(nb00 == sizeof(float));
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_add_f32(params, src0, src1, dst);
return;
#endif
// rows per thread // rows per thread
const int dr = (nr + nth - 1)/nth; const int dr = (nr + nth - 1)/nth;
@ -7244,6 +7261,11 @@ static void ggml_compute_forward_add_f16_f32(
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_add_f16_f32(params, src0, src1, dst);
return;
#endif
// rows per thread // rows per thread
const int dr = (nr + nth - 1)/nth; const int dr = (nr + nth - 1)/nth;
@ -7315,6 +7337,11 @@ static void ggml_compute_forward_add_f16_f16(
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_add_f16_f16(params, src0, src1, dst);
return;
#endif
// rows per thread // rows per thread
const int dr = (nr + nth - 1)/nth; const int dr = (nr + nth - 1)/nth;
@ -7379,6 +7406,11 @@ static void ggml_compute_forward_add_q_f32(
GGML_ASSERT(ggml_is_quantized(src0->type)); GGML_ASSERT(ggml_is_quantized(src0->type));
GGML_ASSERT(src1->type == GGML_TYPE_F32); GGML_ASSERT(src1->type == GGML_TYPE_F32);
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_add_q_f32(params, src0, src1, dst);
return;
#endif
// rows per thread // rows per thread
const int dr = (nr + nth - 1)/nth; const int dr = (nr + nth - 1)/nth;
@ -7429,12 +7461,12 @@ static void ggml_compute_forward_add(
struct ggml_tensor * dst) { struct ggml_tensor * dst) {
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
ggml_compute_forward_add_f32(params, src0, src1, dst); ggml_compute_forward_add_f32(params, src0, src1, dst);
} break; } break;
case GGML_TYPE_F16: case GGML_TYPE_F16:
{ {
if (src1->type == GGML_TYPE_F16) { if (src1->type == GGML_TYPE_F16) {
ggml_compute_forward_add_f16_f16(params, src0, src1, dst); ggml_compute_forward_add_f16_f16(params, src0, src1, dst);
} }
else if (src1->type == GGML_TYPE_F32) { else if (src1->type == GGML_TYPE_F32) {
@ -7455,6 +7487,7 @@ static void ggml_compute_forward_add(
case GGML_TYPE_Q5_K: case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K: case GGML_TYPE_Q6_K:
{ {
ggml_compute_forward_add_q_f32(params, src0, src1, dst); ggml_compute_forward_add_q_f32(params, src0, src1, dst);
} break; } break;
default: default:
@ -8765,6 +8798,10 @@ static void ggml_compute_forward_abs(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_abs_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_abs_f32(params, src0, dst); ggml_compute_forward_abs_f32(params, src0, dst);
} break; } break;
default: default:
@ -8807,6 +8844,10 @@ static void ggml_compute_forward_sgn(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_sgn_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_sgn_f32(params, src0, dst); ggml_compute_forward_sgn_f32(params, src0, dst);
} break; } break;
default: default:
@ -8849,6 +8890,10 @@ static void ggml_compute_forward_neg(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_neg_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_neg_f32(params, src0, dst); ggml_compute_forward_neg_f32(params, src0, dst);
} break; } break;
default: default:
@ -8891,6 +8936,10 @@ static void ggml_compute_forward_step(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_step_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_step_f32(params, src0, dst); ggml_compute_forward_step_f32(params, src0, dst);
} break; } break;
default: default:
@ -8933,6 +8982,10 @@ static void ggml_compute_forward_tanh(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_tanh_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_tanh_f32(params, src0, dst); ggml_compute_forward_tanh_f32(params, src0, dst);
} break; } break;
default: default:
@ -8975,6 +9028,10 @@ static void ggml_compute_forward_elu(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_elu_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_elu_f32(params, src0, dst); ggml_compute_forward_elu_f32(params, src0, dst);
} break; } break;
default: default:
@ -9017,6 +9074,10 @@ static void ggml_compute_forward_relu(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_relu_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_relu_f32(params, src0, dst); ggml_compute_forward_relu_f32(params, src0, dst);
} break; } break;
default: default:
@ -9076,6 +9137,10 @@ static void ggml_compute_forward_gelu(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_gelu_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_gelu_f32(params, src0, dst); ggml_compute_forward_gelu_f32(params, src0, dst);
} break; } break;
default: default:
@ -9135,6 +9200,10 @@ static void ggml_compute_forward_gelu_quick(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_gelu_quick_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_gelu_quick_f32(params, src0, dst); ggml_compute_forward_gelu_quick_f32(params, src0, dst);
} break; } break;
default: default:
@ -9194,6 +9263,10 @@ static void ggml_compute_forward_silu(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_silu_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_silu_f32(params, src0, dst); ggml_compute_forward_silu_f32(params, src0, dst);
} break; } break;
default: default:
@ -9440,7 +9513,11 @@ static void ggml_compute_forward_rms_norm(
struct ggml_tensor * dst) { struct ggml_tensor * dst) {
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_cl_rms_norm_f32(params, src0, dst);
return;
#endif
ggml_compute_forward_rms_norm_f32(params, src0, dst); ggml_compute_forward_rms_norm_f32(params, src0, dst);
} break; } break;
default: default:
@ -10839,14 +10916,26 @@ static void ggml_compute_forward_get_rows(
case GGML_TYPE_Q5_K: case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K: case GGML_TYPE_Q6_K:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_get_rows_q(params, src0, src1, dst);
return;
#endif
ggml_compute_forward_get_rows_q(params, src0, src1, dst); ggml_compute_forward_get_rows_q(params, src0, src1, dst);
} break; } break;
case GGML_TYPE_F16: case GGML_TYPE_F16:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_get_rows_f16(params, src0, src1, dst);
return;
#endif
ggml_compute_forward_get_rows_f16(params, src0, src1, dst); ggml_compute_forward_get_rows_f16(params, src0, src1, dst);
} break; } break;
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_compute_cl_get_rows_f32(params, src0, src1, dst);
return;
#endif
ggml_compute_forward_get_rows_f32(params, src0, src1, dst); ggml_compute_forward_get_rows_f32(params, src0, src1, dst);
} break; } break;
default: default:
@ -11222,6 +11311,10 @@ static void ggml_compute_forward_soft_max(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_cl_softmax(params,src0,src1,dst);
return;
#endif
ggml_compute_forward_soft_max_f32(params, src0, src1, dst); ggml_compute_forward_soft_max_f32(params, src0, src1, dst);
} break; } break;
default: default:
@ -11946,10 +12039,18 @@ static void ggml_compute_forward_rope(
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F16: case GGML_TYPE_F16:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_cl_rope_f16(params, src0, src1, dst, true);
return;
#endif
ggml_compute_forward_rope_f16(params, src0, src1, dst, true); ggml_compute_forward_rope_f16(params, src0, src1, dst, true);
} break; } break;
case GGML_TYPE_F32: case GGML_TYPE_F32:
{ {
#if defined(GGML_USE_CLBLAST)
ggml_cl_rope_f32(params, src0, src1, dst, true);
return;
#endif
ggml_compute_forward_rope_f32(params, src0, src1, dst, true); ggml_compute_forward_rope_f32(params, src0, src1, dst, true);
} break; } break;
default: default: