diff --git a/.devops/main-intel.Dockerfile b/.devops/main-intel.Dockerfile
index b42b33077..274b91b71 100644
--- a/.devops/main-intel.Dockerfile
+++ b/.devops/main-intel.Dockerfile
@@ -15,7 +15,7 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
         export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
     fi && \
     cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
-    cmake --build build --target main
+    cmake --build build --config Release --target main
 
 FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
 
diff --git a/.devops/main-vulkan.Dockerfile b/.devops/main-vulkan.Dockerfile
index 51016048f..6c2b2ed5b 100644
--- a/.devops/main-vulkan.Dockerfile
+++ b/.devops/main-vulkan.Dockerfile
@@ -15,7 +15,7 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
 WORKDIR /app
 COPY . .
 RUN cmake -B build -DLLAMA_VULKAN=1 && \
-    cmake --build build --target main
+    cmake --build build --config Release --target main
 
 # Clean up
 WORKDIR /
diff --git a/.devops/server-intel.Dockerfile b/.devops/server-intel.Dockerfile
index 17bb69f2f..a8e451fa9 100644
--- a/.devops/server-intel.Dockerfile
+++ b/.devops/server-intel.Dockerfile
@@ -15,7 +15,7 @@ RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
         export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
     fi && \
     cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
-    cmake --build build --target server
+    cmake --build build --config Release --target server
 
 FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
 
diff --git a/.devops/server-vulkan.Dockerfile b/.devops/server-vulkan.Dockerfile
index 03b976895..6e757e171 100644
--- a/.devops/server-vulkan.Dockerfile
+++ b/.devops/server-vulkan.Dockerfile
@@ -19,7 +19,7 @@ RUN apt-get update && \
 WORKDIR /app
 COPY . .
 RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
-    cmake --build build --target server
+    cmake --build build --config Release --target server
 
 # Clean up
 WORKDIR /
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index e91e1764b..3e968d179 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -107,7 +107,7 @@ jobs:
               -DLLAMA_FATAL_WARNINGS=OFF \
               -DLLAMA_ALL_WARNINGS=OFF \
               -DCMAKE_BUILD_TYPE=Release;
-          cmake --build build -j $(nproc) --target server
+          cmake --build build --config Release -j $(nproc) --target server
 
       - name: Download the dataset
         id: download_dataset
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
index dcc57d40b..c4324f2b1 100644
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@@ -100,7 +100,7 @@ jobs:
               -DLLAMA_CURL=ON \
               -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
               -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
-          cmake --build build -j $(nproc) --target server
+          cmake --build build --config Release -j $(nproc) --target server
 
 
       - name: Tests
@@ -142,7 +142,7 @@ jobs:
         id: cmake_build
         run: |
           cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
-          cmake --build build -j ${env:NUMBER_OF_PROCESSORS} --target server
+          cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
 
       - name: Python setup
         id: setup_python
diff --git a/README-sycl.md b/README-sycl.md
index 28584c449..cfa248a95 100644
--- a/README-sycl.md
+++ b/README-sycl.md
@@ -186,7 +186,7 @@ Upon a successful installation, SYCL is enabled for the available intel devices,
 git clone https://github.com/oneapi-src/oneMKL
 cd oneMKL
 cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas
-cmake --build buildWithCublas
+cmake --build buildWithCublas --config Release
 ```
 
 
@@ -234,7 +234,7 @@ cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
 cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
 
 # build all binary
-cmake --build build -j -v
+cmake --build build --config Release -j -v
 ```
 
 #### Nvidia GPU
@@ -254,7 +254,7 @@ cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx
 cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
 
 # build all binary
-cmake --build build -j -v
+cmake --build build --config Release -j -v
 
 ```
 
@@ -417,7 +417,7 @@ cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMA
 # Option 2: Or FP16
 cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx  -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
 
-cmake --build build -j
+cmake --build build --config Release -j
 ```
 
 Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former instructions:
diff --git a/README.md b/README.md
index f901ed24d..0f47764cf 100644
--- a/README.md
+++ b/README.md
@@ -307,6 +307,8 @@ In order to build llama.cpp you have three different options.
       make
       ```
 
+      **Note**: for `Debug` builds, run `make LLAMA_DEBUG=1`
+
   - On Windows:
 
     1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
@@ -321,10 +323,26 @@ In order to build llama.cpp you have three different options.
 - Using `CMake`:
 
     ```bash
-    cmake -B build # Note: add -DCMAKE_BUILD_TYPE=Debug here for debug builds
-    cmake --build build
+    cmake -B build
+    cmake --build build --config Release
     ```
 
+    **Note**: for `Debug` builds, there are two cases:
+
+    - General case (esp. for default Makefile or Ninja generation):
+
+      ```bash
+      cmake -B build -DCMAKE_BUILD_TYPE=Debug
+      cmake --build build
+      ```
+
+    - Special case for multi-config generators (`-G` param set to Visual Studio, XCode...; note that `--config` is ignored by other generators):
+
+      ```bash
+      cmake -B build -G "Xcode"
+      cmake --build build --config Debug
+      ```
+
 - Using `Zig` (version 0.11 or later):
 
     Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C,
@@ -509,7 +527,7 @@ Building the program with BLAS support may lead to some performance improvements
     ```bash
     CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ \
         cmake -H. -Bbuild -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
-        && cmake --build build -- -j 16
+        && cmake --build build --config Release -j 16
     ```
     On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON"`.
     However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs).
diff --git a/examples/server/README.md b/examples/server/README.md
index 0e004c6a4..b96a4444a 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -81,7 +81,7 @@ page cache before using this. See https://github.com/ggerganov/llama.cpp/issues/
 
   ```bash
   cmake -B build
-  cmake --build build -t server
+  cmake --build build --config Release -t server
   ```
 
   Binary is at `./build/bin/server`
@@ -103,7 +103,7 @@ page cache before using this. See https://github.com/ggerganov/llama.cpp/issues/
 
   ```bash
   cmake -B build -DLLAMA_SERVER_SSL=ON
-  cmake --build build -t server
+  cmake --build build --config Release -t server
   ```
 
 ## Quick Start