Add GGML_USE_BLAS flag to llama.cpp and update BLAS documentation

2024-08-18 23:47:24 +08:00 · 2024-08-18 23:47:24 +08:00 · 4a4a3420de
commit 4a4a3420de
parent e11bd856d5
2 changed files with 9 additions and 1 deletions
--- a/docs/build.md
+++ b/docs/build.md
@ -125,6 +125,14 @@ This provides BLAS acceleration using only the CPU. Make sure to have OpenBLAS i
        make GGML_OPENBLAS=1
        ```

+  - On Mac (with Intel GPU):
+    ```bash
+    brew install openblas
+    EXPORT PKG_CONFIG_PATH=<openblas_path>
+    make  GGML_OPENBLAS=1 GGML_NO_METAL=1
+    ```
+
+
 - Using `CMake` on Linux:

    ```bash
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -17424,7 +17424,7 @@ bool llama_supports_mlock(void) {

 bool llama_supports_gpu_offload(void) {
 #if defined(GGML_USE_CUDA) || defined(GGML_USE_METAL)   || defined(GGML_USE_VULKAN) || \
-    defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC)
+    defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC) || defined(GGML_USE_BLAS)
    // Defined when llama.cpp is compiled with support for offloading model layers to GPU.
    return true;
 #else