add build&run script, clean CMakefile, update guide by review comments

2024-01-23 14:16:01 +08:00 · 2024-01-23 14:16:01 +08:00 · a0a1304b0c
commit a0a1304b0c
parent 533c647d0e
4 changed files with 59 additions and 11 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -101,7 +101,6 @@ option(LLAMA_CLBLAST                         "llama: use CLBlast"
 option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
 option(LLAMA_METAL_NDEBUG                    "llama: disable Metal debugging"                   OFF)
 option(LLAMA_METAL_SHADER_DEBUG              "llama: compile Metal with -fno-fast-math"         OFF)
-option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
 option(LLAMA_MPI                             "llama: use MPI"                                   OFF)
 option(LLAMA_QKK_64                          "llama: use super-block size of 64 for k-quants"   OFF)
 option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
@ -124,8 +123,12 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
 #
 # Compile flags
 #
+if (LLAMA_SYCL)
+    set(CMAKE_CXX_STANDARD 17)
+else()
+    set(CMAKE_CXX_STANDARD 11)
+endif()

-set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED true)
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_C_STANDARD_REQUIRED true)
@ -452,7 +455,6 @@ if (LLAMA_SYCL)
    if ( NOT DEFINED ENV{ONEAPI_ROOT})
 	message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
    endif()
-    
    #todo: AOT

    find_package(IntelSYCL REQUIRED)
@ -464,7 +466,7 @@ if (LLAMA_SYCL)
    add_compile_options(-I./) #include DPCT 
    add_compile_options(-I/${SYCL_INCLUDE_DIR})

-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wno-narrowing")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")

--- a/README_sycl.md
+++ b/README_sycl.md
@ -120,8 +120,9 @@ cd build
 source /opt/intel/oneapi/setvars.sh

 #for FP16
-cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference
+#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference

+#for FP32
 cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx

 #build example/main only
@ -135,9 +136,13 @@ cmake --build . --config Release -v
 or

 ```
-./sycl_build.sh
+./examples/sycl/build.sh
 ```

+Note:
+
+- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
+
 ### Run

 1. Put model file to folder **models**
@ -190,9 +195,14 @@ GGML_SYCL_DEVICE=0 && ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "Buildi
 or run by script:

 ```
-./examples/sycl_run_llama2.sh
+./examples/sycl/run_llama2.sh
 ```

+Note:
+
+- By default, mmap is used to read model file. In some cases, it leads to the hang issue. Recommend to use parameter **--no-mmap** to disable mmap() to skip this issue.
+
+
 5. Check the device ID in output

 Like：
@ -207,11 +217,10 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device

 |Name|Value|Function|
 |-|-|-|
-|LLAMA_SYCL|ON (mandatory)|Enable build with SYCL code path|
+|LLAMA_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, LLAMA_SYCL=ON is mandatory.|
+|LLAMA_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path. Faster for long-prompt inference. <br>For FP32, not set it.|
 |CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
 |CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
-|GGML_SYCL_F16|OFF (default) or ON|Enable FP16 in computing|
-

 #### Running

@ -223,9 +232,17 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device

 ## Known Issue

+- Error:  `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
+
+  Miss to enable oneAPI running environment.
+
+  Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
+
+
 - Hang during startup

  llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
+
  Solution: add **--no-mmap**.

 ## Todo
--- a/examples/sycl/build.sh
+++ b/examples/sycl/build.sh
@ -0,0 +1,15 @@
+mkdir -p build
+cd build
+source /opt/intel/oneapi/setvars.sh
+
+#for FP16
+#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference
+
+#for FP32
+cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+
+#build example/main only
+#cmake --build . --config Release --target main
+
+#build all binary
+cmake --build . --config Release -v
--- a/examples/sycl/run_llama2.sh
+++ b/examples/sycl/run_llama2.sh
@ -0,0 +1,14 @@
+
+INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
+source /opt/intel/oneapi/setvars.sh
+
+if [ $# -gt 0 ]; then
+  export GGML_SYCL_DEVICE=$1
+else
+  export GGML_SYCL_DEVICE=0
+fi
+echo GGML_SYCL_DEVICE=$GGML_SYCL_DEVICE
+#export GGML_SYCL_DEBUG=1
+./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
+#./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 5 -e -ngl 33 -t 1 -s 0
+