leave the schedule to ggml_backend_sched entirely
This commit is contained in:
parent
4b9f3b432b
commit
0c2aa1a249
2 changed files with 17 additions and 25 deletions
6
ggml.c
6
ggml.c
|
@ -16059,12 +16059,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
||||||
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
||||||
#endif // GGML_USE_VULKAN
|
#endif // GGML_USE_VULKAN
|
||||||
|
|
||||||
#ifdef GGML_USE_SYCL
|
|
||||||
bool skip_cpu = ggml_sycl_compute_forward(params, tensor);
|
|
||||||
if (skip_cpu) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif // GGML_USE_SYCL
|
|
||||||
switch (tensor->op) {
|
switch (tensor->op) {
|
||||||
case GGML_OP_DUP:
|
case GGML_OP_DUP:
|
||||||
{
|
{
|
||||||
|
|
36
llama.cpp
36
llama.cpp
|
@ -13158,30 +13158,28 @@ struct llama_context * llama_new_context_with_model(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif defined(GGML_USE_SYCL)
|
#elif defined(GGML_USE_SYCL)
|
||||||
if (model->n_gpu_layers > 0) {
|
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
|
||||||
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
|
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
||||||
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
|
||||||
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
|
if (backend == nullptr) {
|
||||||
|
int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu);
|
||||||
|
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, main_gpu_id, model->main_gpu);
|
||||||
|
llama_free(ctx);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
ctx->backends.push_back(backend);
|
||||||
|
} else {
|
||||||
|
// LLAMA_SPLIT_LAYER requires a backend for each GPU
|
||||||
|
for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
|
||||||
|
ggml_backend_t backend = ggml_backend_sycl_init(i);
|
||||||
if (backend == nullptr) {
|
if (backend == nullptr) {
|
||||||
int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu);
|
int id_list[GGML_SYCL_MAX_DEVICES];
|
||||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, main_gpu_id, model->main_gpu);
|
ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
|
||||||
|
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, id_list[i], i);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
ctx->backends.push_back(backend);
|
ctx->backends.push_back(backend);
|
||||||
} else {
|
|
||||||
// LLAMA_SPLIT_LAYER requires a backend for each GPU
|
|
||||||
for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
|
|
||||||
ggml_backend_t backend = ggml_backend_sycl_init(i);
|
|
||||||
if (backend == nullptr) {
|
|
||||||
int id_list[GGML_SYCL_MAX_DEVICES];
|
|
||||||
ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
|
|
||||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, id_list[i], i);
|
|
||||||
llama_free(ctx);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
ctx->backends.push_back(backend);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif defined(GGML_USE_KOMPUTE)
|
#elif defined(GGML_USE_KOMPUTE)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue