From 3201a6df2c4748ddee05a081128af5ab2cc6435f Mon Sep 17 00:00:00 2001
From: ShuyRoy <rui@xuruideMacBook-Air.local>
Date: Sun, 28 Jan 2024 16:36:58 +0800
Subject: [PATCH] add opecnl op func and call

---
 ggml-opencl.cpp | 250 ++++++++++++++++++++++++++++++++++++++++++++++++
 ggml.c          | 107 ++++++++++++++++++++-
 2 files changed, 354 insertions(+), 3 deletions(-)

diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp
index 496f9cdca..e79efd72b 100644
--- a/ggml-opencl.cpp
+++ b/ggml-opencl.cpp
@@ -1338,6 +1338,8 @@ void ggml_cl_free_data(const struct ggml_tensor* tensor) {
     clReleaseMemObject(mem);
 }
 
+
+//将数据从host端（主机端，即CPU和其内存）复制到device端（设备端，通常是GPU和其内存）上的一个OpenCL缓冲区
 static cl_int ggml_cl_h2d_tensor_2d(cl_command_queue queue, cl_mem dst, size_t offset, const struct ggml_tensor * src, uint64_t i3, uint64_t i2, cl_event* ev) {
     cl_int err;
     const uint64_t ne0 = src->ne[0];
@@ -1867,6 +1869,254 @@ size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct g
     return 0;
 }
 
+
+
+
+// xr 
+static viod ggml_compute_cl_dup_same_cont(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst){
+
+}
+
+static viod ggml_compute_cl_dup_f16(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst){
+
+}
+
+static viod ggml_compute_cl_dup_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst){
+
+}
+
+
+
+// xr
+static void ggml_cl_softmax(const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+              struct ggml_tensor * dst){
+
+}
+
+// wh
+static void ggml_compute_cl_abs_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+
+}
+
+// xr
+static void ggml_compute_cl_sgn_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+
+}
+
+// xr
+static void ggml_compute_cl_neg_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+
+}
+
+// xr
+static void ggml_compute_cl_step_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+
+}
+
+// xr
+static void ggml_compute_cl_tanh_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+
+}
+
+// xr
+static void ggml_compute_cl_elu_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+
+}
+
+// xr
+static void ggml_compute_cl_relu_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+
+}
+
+// xr
+static void ggml_compute_cl_gelu_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+
+}
+
+// xr
+static void ggml_compute_cl_gelu_quick_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+
+}
+
+// xr
+static void ggml_compute_cl_silu_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst) {
+
+}
+
+// xr NOP
+static void ggml_cl_permute(){
+
+}
+
+// xr NOP
+static void ggml_cl_transpose(){
+
+}
+
+// xr NOP
+static void ggml_cl_view(const struct ggml_compute_params * params, const struc ggml_tensor * src0, struct ggml_tensor * dst){
+
+}
+
+// xr NOP
+static void ggml_cl_reshape(const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst){
+
+}
+
+
+
+// wsy
+static void ggml_cl_rope_f32(const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+        struct ggml_tensor * dst,
+        const bool forward){
+
+}
+
+// wsy
+static void ggml_cl_rope_f16(const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+        struct ggml_tensor * dst,
+        const bool forward){
+
+}
+
+// wsy
+static void ggml_cl_rms_norm_f32(const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        struct ggml_tensor * dst){
+
+}
+
+
+  
+
+// zjl ggml_cl_add
+static void ggml_compute_cl_add_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+        struct ggml_tensor * dst){
+
+}
+
+// zjl
+static void ggml_compute_cl_add_f32(
+    const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+        struct ggml_tensor * dst){
+    
+}
+
+// zjl
+static void ggml_compute_cl_add_f16_f16(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+        struct ggml_tensor * dst){
+    
+}
+
+// zjl
+static void ggml_compute_cl_add_f16_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+        struct ggml_tensor * dst){
+    
+}
+
+// zjl
+static void ggml_compute_cl_add_q_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+        struct ggml_tensor * dst){
+    
+}
+
+
+
+
+// wh  ggml_comput_cl_get_rows
+
+static void ggml_compute_cl_get_rows_q(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+              struct ggml_tensor * dst){
+
+}
+
+// wh
+static void ggml_compute_cl_get_rows_f16(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+              struct ggml_tensor * dst){
+    
+}
+
+// wh
+static void ggml_compute_cl_get_rows_f32(
+        const struct ggml_compute_params * params,
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+              struct ggml_tensor * dst){
+
+}
+
+
+
+
+
+
 void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) {
     const int64_t ne0 = tensor->ne[0];
     const int64_t ne1 = tensor->ne[1];
diff --git a/ggml.c b/ggml.c
index 29a2e18a4..f7db08122 100644
--- a/ggml.c
+++ b/ggml.c
@@ -7115,16 +7115,28 @@ static void ggml_compute_forward_dup(
         const struct ggml_tensor * src0,
         struct ggml_tensor * dst) {
     if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
+        #if defined(GGML_USE_CLBLAST)
+            ggml_compute_cl_dup_same_cont(params, src0, dst);
+            return;
+        #endif
         ggml_compute_forward_dup_same_cont(params, src0, dst);
         return;
     }
     switch (src0->type) {
         case GGML_TYPE_F16:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_dup_f16(params, src0, dst);
+                    return;
+                 #endif
                 ggml_compute_forward_dup_f16(params, src0, dst);
             } break;
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_dup_f32(params, src0, dst);
+                    return;
+                 #endif
                 ggml_compute_forward_dup_f32(params, src0, dst);
             } break;
         default:
@@ -7157,6 +7169,11 @@ static void ggml_compute_forward_add_f32(
     GGML_ASSERT( nb0 == sizeof(float));
     GGML_ASSERT(nb00 == sizeof(float));
 
+    #if defined(GGML_USE_CLBLAST)
+        ggml_compute_cl_add_f32(params, src0, src1, dst);
+        return;
+    #endif  
+
     // rows per thread
     const int dr = (nr + nth - 1)/nth;
 
@@ -7244,6 +7261,11 @@ static void ggml_compute_forward_add_f16_f32(
 
     GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
 
+    #if defined(GGML_USE_CLBLAST)
+        ggml_compute_cl_add_f16_f32(params, src0, src1, dst);
+        return;
+    #endif 
+
     // rows per thread
     const int dr = (nr + nth - 1)/nth;
 
@@ -7315,6 +7337,11 @@ static void ggml_compute_forward_add_f16_f16(
     GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
     GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
 
+    #if defined(GGML_USE_CLBLAST)
+        ggml_compute_cl_add_f16_f16(params, src0, src1, dst);
+        return;
+    #endif
+
     // rows per thread
     const int dr = (nr + nth - 1)/nth;
 
@@ -7379,6 +7406,11 @@ static void ggml_compute_forward_add_q_f32(
     GGML_ASSERT(ggml_is_quantized(src0->type));
     GGML_ASSERT(src1->type == GGML_TYPE_F32);
 
+    #if defined(GGML_USE_CLBLAST)
+        ggml_compute_cl_add_q_f32(params, src0, src1, dst);
+        return;
+    #endif 
+
     // rows per thread
     const int dr = (nr + nth - 1)/nth;
 
@@ -7429,12 +7461,12 @@ static void ggml_compute_forward_add(
         struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
-            {
+            { 
                 ggml_compute_forward_add_f32(params, src0, src1, dst);
             } break;
         case GGML_TYPE_F16:
             {
-                if (src1->type == GGML_TYPE_F16) {
+                if (src1->type == GGML_TYPE_F16) { 
                     ggml_compute_forward_add_f16_f16(params, src0, src1, dst);
                 }
                 else if (src1->type == GGML_TYPE_F32) {
@@ -7455,6 +7487,7 @@ static void ggml_compute_forward_add(
         case GGML_TYPE_Q5_K:
         case GGML_TYPE_Q6_K:
             {
+
                 ggml_compute_forward_add_q_f32(params, src0, src1, dst);
             } break;
         default:
@@ -8765,6 +8798,10 @@ static void ggml_compute_forward_abs(
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_abs_f32(params, src0, dst);
+                    return;
+                #endif
                 ggml_compute_forward_abs_f32(params, src0, dst);
             } break;
         default:
@@ -8807,6 +8844,10 @@ static void ggml_compute_forward_sgn(
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_sgn_f32(params, src0, dst);
+                    return;
+                #endif
                 ggml_compute_forward_sgn_f32(params, src0, dst);
             } break;
         default:
@@ -8849,6 +8890,10 @@ static void ggml_compute_forward_neg(
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_neg_f32(params, src0, dst);
+                    return;
+                #endif
                 ggml_compute_forward_neg_f32(params, src0, dst);
             } break;
         default:
@@ -8891,6 +8936,10 @@ static void ggml_compute_forward_step(
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_step_f32(params, src0, dst);
+                    return;
+                #endif
                 ggml_compute_forward_step_f32(params, src0, dst);
             } break;
         default:
@@ -8933,6 +8982,10 @@ static void ggml_compute_forward_tanh(
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_tanh_f32(params, src0, dst);
+                    return;
+                #endif
                 ggml_compute_forward_tanh_f32(params, src0, dst);
             } break;
         default:
@@ -8975,6 +9028,10 @@ static void ggml_compute_forward_elu(
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_elu_f32(params, src0, dst);
+                    return;
+                #endif
                 ggml_compute_forward_elu_f32(params, src0, dst);
             } break;
         default:
@@ -9017,6 +9074,10 @@ static void ggml_compute_forward_relu(
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_relu_f32(params, src0, dst);
+                    return;
+                #endif
                 ggml_compute_forward_relu_f32(params, src0, dst);
             } break;
         default:
@@ -9076,6 +9137,10 @@ static void ggml_compute_forward_gelu(
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
+                 #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_gelu_f32(params, src0, dst);
+                    return;
+                #endif
                 ggml_compute_forward_gelu_f32(params, src0, dst);
             } break;
         default:
@@ -9135,6 +9200,10 @@ static void ggml_compute_forward_gelu_quick(
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_gelu_quick_f32(params, src0, dst);
+                    return;
+                #endif
                 ggml_compute_forward_gelu_quick_f32(params, src0, dst);
             } break;
         default:
@@ -9194,6 +9263,10 @@ static void ggml_compute_forward_silu(
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_silu_f32(params, src0, dst);
+                    return;
+                #endif
                 ggml_compute_forward_silu_f32(params, src0, dst);
             } break;
         default:
@@ -9440,7 +9513,11 @@ static void ggml_compute_forward_rms_norm(
         struct ggml_tensor * dst) {
     switch (src0->type) {
         case GGML_TYPE_F32:
-            {
+            {   
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_cl_rms_norm_f32(params, src0, dst);
+                    return;
+                #endif
                 ggml_compute_forward_rms_norm_f32(params, src0, dst);
             } break;
         default:
@@ -10839,14 +10916,26 @@ static void ggml_compute_forward_get_rows(
         case GGML_TYPE_Q5_K:
         case GGML_TYPE_Q6_K:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_get_rows_q(params, src0, src1, dst);
+                    return;
+                #endif
                 ggml_compute_forward_get_rows_q(params, src0, src1, dst);
             } break;
         case GGML_TYPE_F16:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_get_rows_f16(params, src0, src1, dst);
+                    return;
+                #endif
                 ggml_compute_forward_get_rows_f16(params, src0, src1, dst);
             } break;
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_compute_cl_get_rows_f32(params, src0, src1, dst);
+                    return;
+                #endif
                 ggml_compute_forward_get_rows_f32(params, src0, src1, dst);
             } break;
         default:
@@ -11222,6 +11311,10 @@ static void ggml_compute_forward_soft_max(
     switch (src0->type) {
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_cl_softmax(params,src0,src1,dst);
+                    return;
+                #endif
                 ggml_compute_forward_soft_max_f32(params, src0, src1, dst);
             } break;
         default:
@@ -11946,10 +12039,18 @@ static void ggml_compute_forward_rope(
     switch (src0->type) {
         case GGML_TYPE_F16:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_cl_rope_f16(params, src0, src1, dst, true);
+                    return;
+                #endif
                 ggml_compute_forward_rope_f16(params, src0, src1, dst, true);
             } break;
         case GGML_TYPE_F32:
             {
+                #if defined(GGML_USE_CLBLAST)
+                    ggml_cl_rope_f32(params, src0, src1, dst, true);
+                    return;
+                #endif
                 ggml_compute_forward_rope_f32(params, src0, src1, dst, true);
             } break;
         default: