From 533c647d0ea3e73800e820fc5950b749e924cae5 Mon Sep 17 00:00:00 2001
From: jianyuzh <jianyu.zhang@intel.com>
Date: Tue, 23 Jan 2024 13:34:05 +0800
Subject: [PATCH] check for sycl blas, better performance

---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 2dfff45c3..d3c9eaa79 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6703,7 +6703,7 @@ static int llama_decode_internal(
     }
 
     const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 1;
-    if (ggml_cpu_has_cublas() && fully_offloaded) {
+    if ((ggml_cpu_has_cublas() || ggml_cpu_has_sycl()) && fully_offloaded) {
         n_threads = 1;
     }