Fix FP32/FP16 build instructions
This commit is contained in:
parent
637e9a86c2
commit
e1f6992d7c
1 changed files with 13 additions and 8 deletions
|
@ -229,12 +229,12 @@ source /opt/intel/oneapi/setvars.sh
|
||||||
# Build LLAMA with MKL BLAS acceleration for intel GPU
|
# Build LLAMA with MKL BLAS acceleration for intel GPU
|
||||||
mkdir -p build && cd build
|
mkdir -p build && cd build
|
||||||
|
|
||||||
# Option 1: Use FP16 for better performance in long-prompt inference
|
# Option 1: Use FP32 by default
|
||||||
#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
|
||||||
|
|
||||||
# Option 2: Use FP32 by default
|
|
||||||
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
|
# Option 2: Use FP16 for better performance in long-prompt inference
|
||||||
|
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
||||||
|
|
||||||
#build all binary
|
#build all binary
|
||||||
cmake --build . --config Release -j -v
|
cmake --build . --config Release -j -v
|
||||||
```
|
```
|
||||||
|
@ -250,12 +250,12 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
|
||||||
# Build LLAMA with Nvidia BLAS acceleration through SYCL
|
# Build LLAMA with Nvidia BLAS acceleration through SYCL
|
||||||
mkdir -p build && cd build
|
mkdir -p build && cd build
|
||||||
|
|
||||||
# Option 1: Use FP16 for better performance in long-prompt inference
|
# Option 1: Use FP32 by default
|
||||||
cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
|
||||||
|
|
||||||
# Option 2: Use FP32 by default
|
|
||||||
cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
|
# Option 2: Use FP16 for better performance in long-prompt inference
|
||||||
|
cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
||||||
|
|
||||||
#build all binary
|
#build all binary
|
||||||
cmake --build . --config Release -j -v
|
cmake --build . --config Release -j -v
|
||||||
|
|
||||||
|
@ -416,8 +416,13 @@ mkdir -p build
|
||||||
cd build
|
cd build
|
||||||
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
|
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
|
||||||
|
|
||||||
|
# Option 1: Use FP32 by default
|
||||||
|
cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
|
||||||
|
|
||||||
|
# Option 2: Or FP16
|
||||||
cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
|
cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
|
||||||
|
|
||||||
|
|
||||||
make -j
|
make -j
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue