llama : disable moe offloading with SYCL
This commit is contained in:
parent
bd17f27ce2
commit
ba5b5467d1
2 changed files with 8 additions and 1 deletions
|
@ -17752,7 +17752,7 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
|
||||||
|
|
||||||
GGML_CALL static bool ggml_backend_sycl_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
|
GGML_CALL static bool ggml_backend_sycl_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
|
||||||
const int min_batch_size = 32;
|
const int min_batch_size = 32;
|
||||||
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
|
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS && op->op != GGML_OP_MUL_MAT_ID;
|
||||||
GGML_UNUSED(backend);
|
GGML_UNUSED(backend);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4495,6 +4495,13 @@ static bool llm_load_tensors(
|
||||||
|
|
||||||
auto & hparams = model.hparams;
|
auto & hparams = model.hparams;
|
||||||
|
|
||||||
|
#ifdef GGML_USE_SYCL
|
||||||
|
// disable MoE with SYCL until mul_mat_id is updated
|
||||||
|
if (hparams.n_expert > 0) {
|
||||||
|
n_gpu_layers = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
model.split_mode = split_mode;
|
model.split_mode = split_mode;
|
||||||
model.main_gpu = main_gpu;
|
model.main_gpu = main_gpu;
|
||||||
model.n_gpu_layers = n_gpu_layers;
|
model.n_gpu_layers = n_gpu_layers;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue