From 3bfb846d6a91917b8ac334c48fcb6ba0cf8be81e Mon Sep 17 00:00:00 2001 From: jianyuzh Date: Tue, 23 Jan 2024 23:45:56 +0800 Subject: [PATCH] fix conflict --- ggml.c | 24 ------------------------ ggml.h | 2 +- llama.cpp | 22 +++++++--------------- sycl_build.sh | 14 -------------- 4 files changed, 8 insertions(+), 54 deletions(-) delete mode 100755 sycl_build.sh diff --git a/ggml.c b/ggml.c index 8368ada71..48f0eb7d0 100644 --- a/ggml.c +++ b/ggml.c @@ -14696,8 +14696,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm if (skip_cpu) { return; } - GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU); - GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU); #endif // GGML_USE_SYCL switch (tensor->op) { case GGML_OP_DUP: @@ -16570,28 +16568,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { //n_tasks = MIN(n_threads, MAX(1, nr0/128)); //printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks); - -#if defined(GGML_USE_CUBLAS) - if (ggml_cuda_can_mul_mat(node->src[0], node->src[1], node)) { - n_tasks = 1; // TODO: this actually is doing nothing - // the threads are still spinning - } -#elif defined(GGML_USE_CLBLAST) - if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) { - n_tasks = 1; // TODO: this actually is doing nothing - // the threads are still spinning - } -#elif defined(GGML_USE_SYCL) - if (ggml_sycl_can_mul_mat(node->src[0], node->src[1], node)) { - n_tasks = 1; - } -#endif -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) - if (ggml_compute_forward_mul_mat_use_blas(node)) { - n_tasks = 1; // TODO: this actually is doing nothing - // the threads are still spinning - } -#endif } break; case GGML_OP_MUL_MAT_ID: { diff --git a/ggml.h b/ggml.h index 5a173d362..a06e27c62 100644 --- a/ggml.h +++ b/ggml.h @@ -2284,7 +2284,7 @@ extern "C" { typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y); - typedef struct dpct_type_994041 { + typedef struct { const char * type_name; int blck_size; size_t type_size; diff --git a/llama.cpp b/llama.cpp index d3c9eaa79..ce74fb557 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9791,14 +9791,6 @@ struct llama_model * llama_load_model_from_file( struct llama_model_params params) { ggml_time_init(); -#ifdef GGML_USE_SYCL - int main_device = get_main_device(); - if(main_device>=0) params.main_gpu = main_device; - else { - LLAMA_LOG_ERROR("%s: missed to init GPU device\n", __func__); - std::exit(1); - } -#endif llama_model * model = new llama_model; unsigned cur_percentage = 0; @@ -9939,13 +9931,13 @@ struct llama_context * llama_new_context_with_model( } #elif defined(GGML_USE_SYCL) if (model->n_gpu_layers > 0) { - ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu); - if (backend == nullptr) { - LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d backend\n", __func__, model->main_gpu); - llama_free(ctx); - return nullptr; - } - ctx->backends.push_back(backend); + ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu); + if (backend == nullptr) { + LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d backend\n", __func__, model->main_gpu); + llama_free(ctx); + return nullptr; + } + ctx->backends.push_back(backend); } #endif ctx->backend_cpu = ggml_backend_cpu_init(); diff --git a/sycl_build.sh b/sycl_build.sh deleted file mode 100755 index 8f2e1d22d..000000000 --- a/sycl_build.sh +++ /dev/null @@ -1,14 +0,0 @@ -mkdir -p build -cd build -source /opt/intel/oneapi/setvars.sh - -#for FP16 -#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON - -cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx - -#build example/main only -#cmake --build . --config Release --target main - -#build all binary -cmake --build . --config Release -v