From 4a4a3420de6f7a09511b33195e34ad181c1d0ac2 Mon Sep 17 00:00:00 2001
From: simonteozw <simonteozw@gmail.com>
Date: Sun, 18 Aug 2024 23:47:24 +0800
Subject: [PATCH] Add GGML_USE_BLAS flag to llama.cpp and update BLAS
 documentation

---
 docs/build.md | 8 ++++++++
 src/llama.cpp | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/docs/build.md b/docs/build.md
index 152d46d6f..cdfa5ef9c 100644
--- a/docs/build.md
+++ b/docs/build.md
@@ -125,6 +125,14 @@ This provides BLAS acceleration using only the CPU. Make sure to have OpenBLAS i
         make GGML_OPENBLAS=1
         ```
 
+  - On Mac (with Intel GPU):
+    ```bash
+    brew install openblas
+    EXPORT PKG_CONFIG_PATH=<openblas_path>
+    make  GGML_OPENBLAS=1 GGML_NO_METAL=1
+    ```
+
+
 - Using `CMake` on Linux:
 
     ```bash
diff --git a/src/llama.cpp b/src/llama.cpp
index aeea54cff..606563d98 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -17424,7 +17424,7 @@ bool llama_supports_mlock(void) {
 
 bool llama_supports_gpu_offload(void) {
 #if defined(GGML_USE_CUDA) || defined(GGML_USE_METAL)   || defined(GGML_USE_VULKAN) || \
-    defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC)
+    defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC) || defined(GGML_USE_BLAS)
     // Defined when llama.cpp is compiled with support for offloading model layers to GPU.
     return true;
 #else