From 3bfb846d6a91917b8ac334c48fcb6ba0cf8be81e Mon Sep 17 00:00:00 2001
From: jianyuzh <jianyu.zhang@intel.com>
Date: Tue, 23 Jan 2024 23:45:56 +0800
Subject: [PATCH] fix conflict

---
 ggml.c        | 24 ------------------------
 ggml.h        |  2 +-
 llama.cpp     | 22 +++++++---------------
 sycl_build.sh | 14 --------------
 4 files changed, 8 insertions(+), 54 deletions(-)
 delete mode 100755 sycl_build.sh

diff --git a/ggml.c b/ggml.c
index 8368ada71..48f0eb7d0 100644
--- a/ggml.c
+++ b/ggml.c
@@ -14696,8 +14696,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
     if (skip_cpu) {
         return;
     }
-    GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
-    GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
 #endif // GGML_USE_SYCL
     switch (tensor->op) {
         case GGML_OP_DUP:
@@ -16570,28 +16568,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
 
                 //n_tasks = MIN(n_threads, MAX(1, nr0/128));
                 //printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks);
-
-#if defined(GGML_USE_CUBLAS)
-                if (ggml_cuda_can_mul_mat(node->src[0], node->src[1], node)) {
-                    n_tasks = 1; // TODO: this actually is doing nothing
-                                 //       the threads are still spinning
-                }
-#elif defined(GGML_USE_CLBLAST)
-                if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) {
-                    n_tasks = 1; // TODO: this actually is doing nothing
-                                 //       the threads are still spinning
-                }
-#elif defined(GGML_USE_SYCL)
-                if (ggml_sycl_can_mul_mat(node->src[0], node->src[1], node)) {
-                    n_tasks = 1;
-                }
-#endif
-#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
-                if (ggml_compute_forward_mul_mat_use_blas(node)) {
-                    n_tasks = 1; // TODO: this actually is doing nothing
-                                 //       the threads are still spinning
-                }
-#endif
             } break;
         case GGML_OP_MUL_MAT_ID:
             {
diff --git a/ggml.h b/ggml.h
index 5a173d362..a06e27c62 100644
--- a/ggml.h
+++ b/ggml.h
@@ -2284,7 +2284,7 @@ extern "C" {
     typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void  * GGML_RESTRICT y, int k);
     typedef void (*ggml_vec_dot_t)   (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
 
-    typedef struct dpct_type_994041 {
+    typedef struct {
         const char      * type_name;
         int               blck_size;
         size_t            type_size;
diff --git a/llama.cpp b/llama.cpp
index d3c9eaa79..ce74fb557 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -9791,14 +9791,6 @@ struct llama_model * llama_load_model_from_file(
               struct llama_model_params   params) {
     ggml_time_init();
 
-#ifdef GGML_USE_SYCL
-    int main_device = get_main_device();
-    if(main_device>=0) params.main_gpu = main_device;
-    else {
-        LLAMA_LOG_ERROR("%s: missed to init GPU device\n", __func__);
-        std::exit(1);
-    }
-#endif
     llama_model * model = new llama_model;
 
     unsigned cur_percentage = 0;
@@ -9939,13 +9931,13 @@ struct llama_context * llama_new_context_with_model(
         }
 #elif defined(GGML_USE_SYCL)
         if (model->n_gpu_layers > 0) {
-                ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
-                if (backend == nullptr) {
-                    LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d backend\n", __func__, model->main_gpu);
-                    llama_free(ctx);
-                    return nullptr;
-                }
-                ctx->backends.push_back(backend);
+            ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
+            if (backend == nullptr) {
+                LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d backend\n", __func__, model->main_gpu);
+                llama_free(ctx);
+                return nullptr;
+            }
+            ctx->backends.push_back(backend);
         }
 #endif
         ctx->backend_cpu = ggml_backend_cpu_init();
diff --git a/sycl_build.sh b/sycl_build.sh
deleted file mode 100755
index 8f2e1d22d..000000000
--- a/sycl_build.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-mkdir -p build
-cd build
-source /opt/intel/oneapi/setvars.sh
-
-#for FP16
-#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
-
-cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
-
-#build example/main only
-#cmake --build . --config Release --target main
-
-#build all binary
-cmake --build . --config Release -v