diff --git a/CMakeLists.txt b/CMakeLists.txt index 6fe3488d9..6d2168250 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,7 +101,6 @@ option(LLAMA_CLBLAST "llama: use CLBlast" option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) -option(LLAMA_SYCL "llama: use SYCL" OFF) option(LLAMA_MPI "llama: use MPI" OFF) option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) option(LLAMA_SYCL "llama: use SYCL" OFF) @@ -124,8 +123,12 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake) # # Compile flags # +if (LLAMA_SYCL) + set(CMAKE_CXX_STANDARD 17) +else() + set(CMAKE_CXX_STANDARD 11) +endif() -set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED true) set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED true) @@ -452,7 +455,6 @@ if (LLAMA_SYCL) if ( NOT DEFINED ENV{ONEAPI_ROOT}) message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh") endif() - #todo: AOT find_package(IntelSYCL REQUIRED) @@ -464,7 +466,7 @@ if (LLAMA_SYCL) add_compile_options(-I./) #include DPCT add_compile_options(-I/${SYCL_INCLUDE_DIR}) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wno-narrowing") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib") diff --git a/README_sycl.md b/README_sycl.md index 993155071..c81fc7029 100644 --- a/README_sycl.md +++ b/README_sycl.md @@ -120,8 +120,9 @@ cd build source /opt/intel/oneapi/setvars.sh #for FP16 -cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference +#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference +#for FP32 cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx #build example/main only @@ -135,9 +136,13 @@ cmake --build . --config Release -v or ``` -./sycl_build.sh +./examples/sycl/build.sh ``` +Note: + +- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only. + ### Run 1. Put model file to folder **models** @@ -190,9 +195,14 @@ GGML_SYCL_DEVICE=0 && ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "Buildi or run by script: ``` -./examples/sycl_run_llama2.sh +./examples/sycl/run_llama2.sh ``` +Note: + +- By default, mmap is used to read model file. In some cases, it leads to the hang issue. Recommend to use parameter **--no-mmap** to disable mmap() to skip this issue. + + 5. Check the device ID in output Like: @@ -207,11 +217,10 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device |Name|Value|Function| |-|-|-| -|LLAMA_SYCL|ON (mandatory)|Enable build with SYCL code path| +|LLAMA_SYCL|ON (mandatory)|Enable build with SYCL code path.
For FP32/FP16, LLAMA_SYCL=ON is mandatory.| +|LLAMA_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path. Faster for long-prompt inference.
For FP32, not set it.| |CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path| |CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path| -|GGML_SYCL_F16|OFF (default) or ON|Enable FP16 in computing| - #### Running @@ -223,9 +232,17 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device ## Known Issue -- Hang during startup +- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`. + + Miss to enable oneAPI running environment. + + Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`. + + +- Hang during startup llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block. + Solution: add **--no-mmap**. ## Todo diff --git a/examples/sycl/build.sh b/examples/sycl/build.sh new file mode 100755 index 000000000..5556f3c6c --- /dev/null +++ b/examples/sycl/build.sh @@ -0,0 +1,15 @@ +mkdir -p build +cd build +source /opt/intel/oneapi/setvars.sh + +#for FP16 +#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference + +#for FP32 +cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx + +#build example/main only +#cmake --build . --config Release --target main + +#build all binary +cmake --build . --config Release -v diff --git a/examples/sycl/run_llama2.sh b/examples/sycl/run_llama2.sh new file mode 100755 index 000000000..045e01a94 --- /dev/null +++ b/examples/sycl/run_llama2.sh @@ -0,0 +1,14 @@ + +INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" +source /opt/intel/oneapi/setvars.sh + +if [ $# -gt 0 ]; then + export GGML_SYCL_DEVICE=$1 +else + export GGML_SYCL_DEVICE=0 +fi +echo GGML_SYCL_DEVICE=$GGML_SYCL_DEVICE +#export GGML_SYCL_DEBUG=1 +./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 +#./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 5 -e -ngl 33 -t 1 -s 0 +