set nthread=1 when sycl, increase performance

2024-01-15 14:33:52 +08:00 · 2024-01-15 14:33:52 +08:00 · a8936f4902
commit a8936f4902
parent 95daece908
2 changed files with 2 additions and 1 deletions
--- a/ggml.h
+++ b/ggml.h
@ -2267,6 +2267,7 @@ extern "C" {
    GGML_API int ggml_cpu_has_gpublas    (void);
    GGML_API int ggml_cpu_has_sse3       (void);
    GGML_API int ggml_cpu_has_ssse3      (void);
    GGML_API int ggml_cpu_has_sycl       (void);
    GGML_API int ggml_cpu_has_vsx        (void);
    //
--- a/llama.cpp
+++ b/llama.cpp
@ -6703,7 +6703,7 @@ static int llama_decode_internal(
    }
    const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 1;
-    if (ggml_cpu_has_cublas() && fully_offloaded) {
+    if ((ggml_cpu_has_cublas() || ggml_cpu_has_sycl()) && fully_offloaded) {
        n_threads = 1;
    }