diff --git a/common/common.cpp b/common/common.cpp index 8d7bba9f5..664d0638d 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -641,8 +641,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { params.split_mode = LLAMA_SPLIT_MODE_LAYER; } else if (arg_next == "row") { #ifdef GGML_USE_SYCL - fprintf(stderr, "warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\nExit!\n"); - exit(1); + fprintf(stderr, "warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\nExit!\n"); + exit(1); #endif // GGML_USE_SYCL params.split_mode = LLAMA_SPLIT_MODE_ROW; } else { diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 6575e28e3..f21bd23bd 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -123,8 +123,6 @@ static std::string get_gpu_info() { } #endif #ifdef GGML_USE_SYCL - int device_list[GGML_SYCL_MAX_DEVICES]; - ggml_sycl_get_gpu_list(device_list, GGML_SYCL_MAX_DEVICES); int count = ggml_backend_sycl_get_device_count(); for (int i = 0; i < count; i++) { char buf[128]; diff --git a/examples/sycl/run-llama2.sh b/examples/sycl/run-llama2.sh index 8ad0bc047..52f7c01a4 100755 --- a/examples/sycl/run-llama2.sh +++ b/examples/sycl/run-llama2.sh @@ -19,7 +19,7 @@ echo "use $GGML_SYCL_DEVICE as main GPU" #ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer. #use all GPUs with same max compute units -ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE +ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 #use main GPU only #ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none diff --git a/llama.cpp b/llama.cpp index 82d7cfab1..7d66b1624 100644 --- a/llama.cpp +++ b/llama.cpp @@ -104,9 +104,6 @@ #define LLAMA_MAX_NODES 8192 #define LLAMA_MAX_EXPERTS 8 -#if (defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)) -#define GGML_USE_CUBLAS_SYCL -#endif // // logging