diff --git a/ggml.c b/ggml.c index 1d5854960..d1a2775f2 100644 --- a/ggml.c +++ b/ggml.c @@ -16059,12 +16059,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU); #endif // GGML_USE_VULKAN -#ifdef GGML_USE_SYCL - bool skip_cpu = ggml_sycl_compute_forward(params, tensor); - if (skip_cpu) { - return; - } -#endif // GGML_USE_SYCL switch (tensor->op) { case GGML_OP_DUP: { diff --git a/llama.cpp b/llama.cpp index 1a9fe0c4d..8bc894ade 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13158,30 +13158,28 @@ struct llama_context * llama_new_context_with_model( } } #elif defined(GGML_USE_SYCL) - if (model->n_gpu_layers > 0) { - // with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used - if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) { - ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu); + // with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used + if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) { + ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu); + if (backend == nullptr) { + int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu); + LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, main_gpu_id, model->main_gpu); + llama_free(ctx); + return nullptr; + } + ctx->backends.push_back(backend); + } else { + // LLAMA_SPLIT_LAYER requires a backend for each GPU + for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) { + ggml_backend_t backend = ggml_backend_sycl_init(i); if (backend == nullptr) { - int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu); - LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, main_gpu_id, model->main_gpu); + int id_list[GGML_SYCL_MAX_DEVICES]; + ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES); + LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, id_list[i], i); llama_free(ctx); return nullptr; } ctx->backends.push_back(backend); - } else { - // LLAMA_SPLIT_LAYER requires a backend for each GPU - for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) { - ggml_backend_t backend = ggml_backend_sycl_init(i); - if (backend == nullptr) { - int id_list[GGML_SYCL_MAX_DEVICES]; - ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES); - LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, id_list[i], i); - llama_free(ctx); - return nullptr; - } - ctx->backends.push_back(backend); - } } } #elif defined(GGML_USE_KOMPUTE)