diff --git a/.devops/main-intel.Dockerfile b/.devops/main-intel.Dockerfile index 11937772d..b42b33077 100644 --- a/.devops/main-intel.Dockerfile +++ b/.devops/main-intel.Dockerfile @@ -14,7 +14,7 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ echo "LLAMA_SYCL_F16 is set" && \ export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ fi && \ - cmake . -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ + cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ cmake --build build --target main FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime diff --git a/.devops/main-vulkan.Dockerfile b/.devops/main-vulkan.Dockerfile index 6b1d2dff7..51016048f 100644 --- a/.devops/main-vulkan.Dockerfile +++ b/.devops/main-vulkan.Dockerfile @@ -14,7 +14,7 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key # Build it WORKDIR /app COPY . . -RUN cmake . -B build -DLLAMA_VULKAN=1 && \ +RUN cmake -B build -DLLAMA_VULKAN=1 && \ cmake --build build --target main # Clean up diff --git a/.devops/server-intel.Dockerfile b/.devops/server-intel.Dockerfile index 4763c9f0a..17bb69f2f 100644 --- a/.devops/server-intel.Dockerfile +++ b/.devops/server-intel.Dockerfile @@ -14,7 +14,7 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ echo "LLAMA_SYCL_F16 is set" && \ export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ fi && \ - cmake . -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ + cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ cmake --build build --target server FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime diff --git a/.devops/server-vulkan.Dockerfile b/.devops/server-vulkan.Dockerfile index 51f90e12c..03b976895 100644 --- a/.devops/server-vulkan.Dockerfile +++ b/.devops/server-vulkan.Dockerfile @@ -18,7 +18,7 @@ RUN apt-get update && \ # Build it WORKDIR /app COPY . . -RUN cmake . -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ +RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ cmake --build build --target server # Clean up diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index bb4a9fdc9..e91e1764b 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -96,7 +96,7 @@ jobs: id: cmake_build run: | set -eux - cmake . -B build \ + cmake -B build \ -DLLAMA_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ -DLLAMA_CURL=ON \ diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 1d6286aa3..dcc57d40b 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -94,7 +94,7 @@ jobs: - name: Build id: cmake_build run: | - cmake . -B build \ + cmake -B build \ -DLLAMA_NATIVE=OFF \ -DLLAMA_BUILD_SERVER=ON \ -DLLAMA_CURL=ON \ @@ -141,7 +141,7 @@ jobs: - name: Build id: cmake_build run: | - cmake . -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" + cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" cmake --build build -j ${env:NUMBER_OF_PROCESSORS} --target server - name: Python setup diff --git a/README-sycl.md b/README-sycl.md index 58081674f..1cc411270 100644 --- a/README-sycl.md +++ b/README-sycl.md @@ -228,10 +228,10 @@ source /opt/intel/oneapi/setvars.sh # Build LLAMA with MKL BLAS acceleration for intel GPU # Option 1: Use FP32 (recommended for better performance in most cases) -cmake . -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx # Option 2: Use FP16 -cmake . -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON +cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # build all binary cmake --build build -j -v @@ -248,10 +248,10 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR # Build LLAMA with Nvidia BLAS acceleration through SYCL # Option 1: Use FP32 (recommended for better performance in most cases) -cmake . -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx # Option 2: Use FP16 -cmake . -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON +cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # build all binary cmake --build build -j -v diff --git a/README.md b/README.md index 2203d3fbb..f901ed24d 100644 --- a/README.md +++ b/README.md @@ -321,7 +321,7 @@ In order to build llama.cpp you have three different options. - Using `CMake`: ```bash - cmake . -B build # Note: add -DCMAKE_BUILD_TYPE=Debug here for debug builds + cmake -B build # Note: add -DCMAKE_BUILD_TYPE=Debug here for debug builds cmake --build build ``` @@ -436,7 +436,7 @@ Building the program with BLAS support may lead to some performance improvements - Using `CMake` on Linux: ```bash - cmake . -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS + cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS cmake --build build ``` @@ -458,7 +458,7 @@ Building the program with BLAS support may lead to some performance improvements By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps: ```bash source /opt/intel/oneapi/setvars.sh # You can skip this step if in oneapi-basekit docker image, only required for manual installation - cmake . -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON + cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON cmake --build build ``` @@ -480,7 +480,7 @@ Building the program with BLAS support may lead to some performance improvements - Using `CMake`: ```bash - cmake . -B build -DLLAMA_CUDA=ON + cmake -B build -DLLAMA_CUDA=ON cmake --build build ``` @@ -556,7 +556,7 @@ Building the program with BLAS support may lead to some performance improvements ```sh git clone --recurse-submodules https://github.com/KhronosGroup/OpenCL-SDK.git cd OpenCL-SDK - cmake . -B build -DBUILD_DOCS=OFF \ + cmake -B build -DBUILD_DOCS=OFF \ -DBUILD_EXAMPLES=OFF \ -DBUILD_TESTING=OFF \ -DOPENCL_SDK_BUILD_SAMPLES=OFF \ @@ -585,7 +585,7 @@ Building the program with BLAS support may lead to some performance improvements set OPENCL_SDK_ROOT="C:/OpenCL-SDK-v2023.04.17-Win-x64" git clone https://github.com/CNugteren/CLBlast.git cd CLBlast - cmake . -B build -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64 + cmake -B build -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64 cmake --build build --config Release cmake --install build --prefix C:/CLBlast ``` @@ -598,7 +598,7 @@ Building the program with BLAS support may lead to some performance improvements ```sh git clone https://github.com/CNugteren/CLBlast.git cd CLBlast - cmake . -B build -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF + cmake -B build -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF cmake --build build cmake --install build --prefix /some/path ``` @@ -614,7 +614,7 @@ Building the program with BLAS support may lead to some performance improvements ``` - CMake (Unix): ```sh - cmake . -B build -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path + cmake -B build -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path cmake --build build ``` - CMake (Windows): @@ -622,7 +622,7 @@ Building the program with BLAS support may lead to some performance improvements set CL_BLAST_CMAKE_PKG="C:/CLBlast/lib/cmake/CLBlast" git clone https://github.com/ggerganov/llama.cpp cd llama.cpp - cmake . -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64 + cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64 cmake --build build --config Release cmake --install build --prefix C:/LlamaCPP ``` @@ -680,7 +680,7 @@ Building the program with BLAS support may lead to some performance improvements Then, build llama.cpp using the cmake command below: ```bash - cmake . -B build -DLLAMA_VULKAN=1 + cmake -B build -DLLAMA_VULKAN=1 cmake --build build # Test the output binary (with "-ngl 33" to offload all layers to GPU) ./bin/main -m "PATH_TO_MODEL" -p "Hi you how are you" -n 50 -e -ngl 33 -t 4 diff --git a/examples/main-cmake-pkg/README.md b/examples/main-cmake-pkg/README.md index ae2091afc..edf20d8db 100644 --- a/examples/main-cmake-pkg/README.md +++ b/examples/main-cmake-pkg/README.md @@ -17,7 +17,7 @@ In this case, CLBlast was already installed so the CMake package is referenced i ```cmd git clone https://github.com/ggerganov/llama.cpp cd llama.cpp -cmake . -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=C:/CLBlast/lib/cmake/CLBlast -G "Visual Studio 17 2022" -A x64 +cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=C:/CLBlast/lib/cmake/CLBlast -G "Visual Studio 17 2022" -A x64 cmake --build build --config Release cmake --install build --prefix C:/LlamaCPP ``` @@ -27,7 +27,7 @@ cmake --install build --prefix C:/LlamaCPP ```cmd cd ..\examples\main-cmake-pkg -cmake . -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="C:/CLBlast/lib/cmake/CLBlast;C:/LlamaCPP/lib/cmake/Llama" -G "Visual Studio 17 2022" -A x64 +cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="C:/CLBlast/lib/cmake/CLBlast;C:/LlamaCPP/lib/cmake/Llama" -G "Visual Studio 17 2022" -A x64 cmake --build build --config Release cmake --install build --prefix C:/MyLlamaApp ``` diff --git a/examples/server/README.md b/examples/server/README.md index d74b4f899..0e004c6a4 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -80,7 +80,7 @@ page cache before using this. See https://github.com/ggerganov/llama.cpp/issues/ - Using `CMake`: ```bash - cmake . -B build + cmake -B build cmake --build build -t server ``` @@ -102,7 +102,7 @@ page cache before using this. See https://github.com/ggerganov/llama.cpp/issues/ - Using `CMake`: ```bash - cmake . -B build -DLLAMA_SERVER_SSL=ON + cmake -B build -DLLAMA_SERVER_SSL=ON cmake --build build -t server ```