From 4a4a3420de6f7a09511b33195e34ad181c1d0ac2 Mon Sep 17 00:00:00 2001 From: simonteozw Date: Sun, 18 Aug 2024 23:47:24 +0800 Subject: [PATCH] Add GGML_USE_BLAS flag to llama.cpp and update BLAS documentation --- docs/build.md | 8 ++++++++ src/llama.cpp | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/build.md b/docs/build.md index 152d46d6f..cdfa5ef9c 100644 --- a/docs/build.md +++ b/docs/build.md @@ -125,6 +125,14 @@ This provides BLAS acceleration using only the CPU. Make sure to have OpenBLAS i make GGML_OPENBLAS=1 ``` + - On Mac (with Intel GPU): + ```bash + brew install openblas + EXPORT PKG_CONFIG_PATH= + make GGML_OPENBLAS=1 GGML_NO_METAL=1 + ``` + + - Using `CMake` on Linux: ```bash diff --git a/src/llama.cpp b/src/llama.cpp index aeea54cff..606563d98 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17424,7 +17424,7 @@ bool llama_supports_mlock(void) { bool llama_supports_gpu_offload(void) { #if defined(GGML_USE_CUDA) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \ - defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC) + defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC) || defined(GGML_USE_BLAS) // Defined when llama.cpp is compiled with support for offloading model layers to GPU. return true; #else