diff --git a/docs/build.md b/docs/build.md index 152d46d6f..cdfa5ef9c 100644 --- a/docs/build.md +++ b/docs/build.md @@ -125,6 +125,14 @@ This provides BLAS acceleration using only the CPU. Make sure to have OpenBLAS i make GGML_OPENBLAS=1 ``` + - On Mac (with Intel GPU): + ```bash + brew install openblas + EXPORT PKG_CONFIG_PATH= + make GGML_OPENBLAS=1 GGML_NO_METAL=1 + ``` + + - Using `CMake` on Linux: ```bash diff --git a/src/llama.cpp b/src/llama.cpp index aeea54cff..606563d98 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17424,7 +17424,7 @@ bool llama_supports_mlock(void) { bool llama_supports_gpu_offload(void) { #if defined(GGML_USE_CUDA) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \ - defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC) + defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC) || defined(GGML_USE_BLAS) // Defined when llama.cpp is compiled with support for offloading model layers to GPU. return true; #else