leave the schedule to ggml_backend_sched entirely

This commit is contained in:
Meng, Hengyu 2024-03-23 08:29:54 +00:00
parent 4b9f3b432b
commit 0c2aa1a249
2 changed files with 17 additions and 25 deletions

6
ggml.c
View file

@ -16059,12 +16059,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
#endif // GGML_USE_VULKAN
#ifdef GGML_USE_SYCL
bool skip_cpu = ggml_sycl_compute_forward(params, tensor);
if (skip_cpu) {
return;
}
#endif // GGML_USE_SYCL
switch (tensor->op) {
case GGML_OP_DUP:
{

View file

@ -13158,7 +13158,6 @@ struct llama_context * llama_new_context_with_model(
}
}
#elif defined(GGML_USE_SYCL)
if (model->n_gpu_layers > 0) {
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
@ -13183,7 +13182,6 @@ struct llama_context * llama_new_context_with_model(
ctx->backends.push_back(backend);
}
}
}
#elif defined(GGML_USE_KOMPUTE)
if (model->n_gpu_layers > 0) {
auto * backend = ggml_backend_kompute_init(model->main_gpu);