diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6fe3488d9..6d2168250 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -101,7 +101,6 @@ option(LLAMA_CLBLAST "llama: use CLBlast"
option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT})
option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF)
option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF)
-option(LLAMA_SYCL "llama: use SYCL" OFF)
option(LLAMA_MPI "llama: use MPI" OFF)
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
option(LLAMA_SYCL "llama: use SYCL" OFF)
@@ -124,8 +123,12 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
#
# Compile flags
#
+if (LLAMA_SYCL)
+ set(CMAKE_CXX_STANDARD 17)
+else()
+ set(CMAKE_CXX_STANDARD 11)
+endif()
-set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)
@@ -452,7 +455,6 @@ if (LLAMA_SYCL)
if ( NOT DEFINED ENV{ONEAPI_ROOT})
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
endif()
-
#todo: AOT
find_package(IntelSYCL REQUIRED)
@@ -464,7 +466,7 @@ if (LLAMA_SYCL)
add_compile_options(-I./) #include DPCT
add_compile_options(-I/${SYCL_INCLUDE_DIR})
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wno-narrowing")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
diff --git a/README_sycl.md b/README_sycl.md
index 993155071..c81fc7029 100644
--- a/README_sycl.md
+++ b/README_sycl.md
@@ -120,8 +120,9 @@ cd build
source /opt/intel/oneapi/setvars.sh
#for FP16
-cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference
+#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference
+#for FP32
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
#build example/main only
@@ -135,9 +136,13 @@ cmake --build . --config Release -v
or
```
-./sycl_build.sh
+./examples/sycl/build.sh
```
+Note:
+
+- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
+
### Run
1. Put model file to folder **models**
@@ -190,9 +195,14 @@ GGML_SYCL_DEVICE=0 && ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "Buildi
or run by script:
```
-./examples/sycl_run_llama2.sh
+./examples/sycl/run_llama2.sh
```
+Note:
+
+- By default, mmap is used to read model file. In some cases, it leads to the hang issue. Recommend to use parameter **--no-mmap** to disable mmap() to skip this issue.
+
+
5. Check the device ID in output
Likeļ¼
@@ -207,11 +217,10 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|Name|Value|Function|
|-|-|-|
-|LLAMA_SYCL|ON (mandatory)|Enable build with SYCL code path|
+|LLAMA_SYCL|ON (mandatory)|Enable build with SYCL code path.
For FP32/FP16, LLAMA_SYCL=ON is mandatory.|
+|LLAMA_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path. Faster for long-prompt inference.
For FP32, not set it.|
|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
-|GGML_SYCL_F16|OFF (default) or ON|Enable FP16 in computing|
-
#### Running
@@ -223,9 +232,17 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
## Known Issue
-- Hang during startup
+- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
+
+ Miss to enable oneAPI running environment.
+
+ Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
+
+
+- Hang during startup
llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
+
Solution: add **--no-mmap**.
## Todo
diff --git a/examples/sycl/build.sh b/examples/sycl/build.sh
new file mode 100755
index 000000000..5556f3c6c
--- /dev/null
+++ b/examples/sycl/build.sh
@@ -0,0 +1,15 @@
+mkdir -p build
+cd build
+source /opt/intel/oneapi/setvars.sh
+
+#for FP16
+#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference
+
+#for FP32
+cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+
+#build example/main only
+#cmake --build . --config Release --target main
+
+#build all binary
+cmake --build . --config Release -v
diff --git a/examples/sycl/run_llama2.sh b/examples/sycl/run_llama2.sh
new file mode 100755
index 000000000..045e01a94
--- /dev/null
+++ b/examples/sycl/run_llama2.sh
@@ -0,0 +1,14 @@
+
+INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
+source /opt/intel/oneapi/setvars.sh
+
+if [ $# -gt 0 ]; then
+ export GGML_SYCL_DEVICE=$1
+else
+ export GGML_SYCL_DEVICE=0
+fi
+echo GGML_SYCL_DEVICE=$GGML_SYCL_DEVICE
+#export GGML_SYCL_DEBUG=1
+./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
+#./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 5 -e -ngl 33 -t 1 -s 0
+