diff --git a/.devops/main-intel.Dockerfile b/.devops/main-intel.Dockerfile index b42b33077..274b91b71 100644 --- a/.devops/main-intel.Dockerfile +++ b/.devops/main-intel.Dockerfile @@ -15,7 +15,7 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ fi && \ cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ - cmake --build build --target main + cmake --build build --config Release --target main FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime diff --git a/.devops/main-vulkan.Dockerfile b/.devops/main-vulkan.Dockerfile index 51016048f..6c2b2ed5b 100644 --- a/.devops/main-vulkan.Dockerfile +++ b/.devops/main-vulkan.Dockerfile @@ -15,7 +15,7 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key WORKDIR /app COPY . . RUN cmake -B build -DLLAMA_VULKAN=1 && \ - cmake --build build --target main + cmake --build build --config Release --target main # Clean up WORKDIR / diff --git a/.devops/server-intel.Dockerfile b/.devops/server-intel.Dockerfile index 17bb69f2f..a8e451fa9 100644 --- a/.devops/server-intel.Dockerfile +++ b/.devops/server-intel.Dockerfile @@ -15,7 +15,7 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ fi && \ cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ - cmake --build build --target server + cmake --build build --config Release --target server FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime diff --git a/.devops/server-vulkan.Dockerfile b/.devops/server-vulkan.Dockerfile index 03b976895..6e757e171 100644 --- a/.devops/server-vulkan.Dockerfile +++ b/.devops/server-vulkan.Dockerfile @@ -19,7 +19,7 @@ RUN apt-get update && \ WORKDIR /app COPY . . RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ - cmake --build build --target server + cmake --build build --config Release --target server # Clean up WORKDIR / diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index e91e1764b..3e968d179 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -107,7 +107,7 @@ jobs: -DLLAMA_FATAL_WARNINGS=OFF \ -DLLAMA_ALL_WARNINGS=OFF \ -DCMAKE_BUILD_TYPE=Release; - cmake --build build -j $(nproc) --target server + cmake --build build --config Release -j $(nproc) --target server - name: Download the dataset id: download_dataset diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index dcc57d40b..c4324f2b1 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -100,7 +100,7 @@ jobs: -DLLAMA_CURL=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; - cmake --build build -j $(nproc) --target server + cmake --build build --config Release -j $(nproc) --target server - name: Tests @@ -142,7 +142,7 @@ jobs: id: cmake_build run: | cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" - cmake --build build -j ${env:NUMBER_OF_PROCESSORS} --target server + cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server - name: Python setup id: setup_python diff --git a/README-sycl.md b/README-sycl.md index 28584c449..cfa248a95 100644 --- a/README-sycl.md +++ b/README-sycl.md @@ -186,7 +186,7 @@ Upon a successful installation, SYCL is enabled for the available intel devices, git clone https://github.com/oneapi-src/oneMKL cd oneMKL cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas -cmake --build buildWithCublas +cmake --build buildWithCublas --config Release ``` @@ -234,7 +234,7 @@ cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # build all binary -cmake --build build -j -v +cmake --build build --config Release -j -v ``` #### Nvidia GPU @@ -254,7 +254,7 @@ cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # build all binary -cmake --build build -j -v +cmake --build build --config Release -j -v ``` @@ -417,7 +417,7 @@ cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMA # Option 2: Or FP16 cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON -cmake --build build -j +cmake --build build --config Release -j ``` Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former instructions: diff --git a/README.md b/README.md index f901ed24d..0f47764cf 100644 --- a/README.md +++ b/README.md @@ -307,6 +307,8 @@ In order to build llama.cpp you have three different options. make ``` + **Note**: for `Debug` builds, run `make LLAMA_DEBUG=1` + - On Windows: 1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases). @@ -321,10 +323,26 @@ In order to build llama.cpp you have three different options. - Using `CMake`: ```bash - cmake -B build # Note: add -DCMAKE_BUILD_TYPE=Debug here for debug builds - cmake --build build + cmake -B build + cmake --build build --config Release ``` + **Note**: for `Debug` builds, there are two cases: + + - General case (esp. for default Makefile or Ninja generation): + + ```bash + cmake -B build -DCMAKE_BUILD_TYPE=Debug + cmake --build build + ``` + + - Special case for multi-config generators (`-G` param set to Visual Studio, XCode...; note that `--config` is ignored by other generators): + + ```bash + cmake -B build -G "Xcode" + cmake --build build --config Debug + ``` + - Using `Zig` (version 0.11 or later): Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C, @@ -509,7 +527,7 @@ Building the program with BLAS support may lead to some performance improvements ```bash CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ \ cmake -H. -Bbuild -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \ - && cmake --build build -- -j 16 + && cmake --build build --config Release -j 16 ``` On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON"`. However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs). diff --git a/examples/server/README.md b/examples/server/README.md index 0e004c6a4..b96a4444a 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -81,7 +81,7 @@ page cache before using this. See https://github.com/ggerganov/llama.cpp/issues/ ```bash cmake -B build - cmake --build build -t server + cmake --build build --config Release -t server ``` Binary is at `./build/bin/server` @@ -103,7 +103,7 @@ page cache before using this. See https://github.com/ggerganov/llama.cpp/issues/ ```bash cmake -B build -DLLAMA_SERVER_SSL=ON - cmake --build build -t server + cmake --build build --config Release -t server ``` ## Quick Start