fix apple build

This commit is contained in:
slaren 2024-06-06 19:54:14 +02:00
parent 2bfdb7fe4e
commit 0425305d32
6 changed files with 22 additions and 14 deletions

View file

@ -39,8 +39,12 @@ endif()
if (APPLE)
set(LLAMA_METAL_DEFAULT ON)
set(LLAMA_BLAS_DEFAULT ON)
set(LLAMA_BLAS_VENDOR_DEFAULT "Apple")
else()
set(LLAMA_METAL_DEFAULT OFF)
set(LLAMA_BLAS_DEFAULT OFF)
set(LLAMA_BLAS_VENDOR_DEFAULT "Generic")
endif()
set(LLAMA_LLAMAFILE_DEFAULT ON)
@ -91,8 +95,9 @@ endif()
# 3rd party libs
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
option(LLAMA_BLAS "llama: use BLAS" OFF)
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
option(LLAMA_BLAS "llama: use BLAS" ${LLAMA_BLAS_DEFAULT})
set(LLAMA_BLAS_VENDOR ${LLAMA_BLAS_VENDOR_DEFAULT} CACHE STRING
"llama: BLAS library vendor")
option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT})
option(LLAMA_CUDA "llama: use CUDA" OFF)
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
@ -321,7 +326,7 @@ if (LLAMA_BLAS)
if (BLAS_FOUND)
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
if ("${BLAS_INCLUDE_DIRS}" STREQUAL "")
if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${LLAMA_BLAS_VENDOR} MATCHES "Apple"))
# BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
find_package(PkgConfig REQUIRED)

View file

@ -404,7 +404,7 @@ ifndef LLAMA_NO_ACCELERATE
# Mac OS - include Accelerate framework.
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
ifeq ($(UNAME_S),Darwin)
MK_CPPFLAGS += -DGGML_USE_ACCELERATE
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
MK_LDFLAGS += -framework Accelerate

View file

@ -706,7 +706,6 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i];
struct node_alloc * node_alloc = &galloc->node_allocs[i];
//node_alloc->buffer_id = get_node_buffer_id(node_buffer_ids, i);
if (node->view_src || node->data) {
node_alloc->dst.buffer_id = -1;
node_alloc->dst.offset = SIZE_MAX;

View file

@ -16,6 +16,7 @@ struct ggml_backend_blas_context {
int n_threads;
char * work_data;
size_t work_size;
std::vector<std::future<void>> tasks;
};
// helper function to determine if it is better to use BLAS or not
@ -33,7 +34,7 @@ static bool ggml_backend_blas_use_blas(const struct ggml_tensor * dst) {
if (ggml_is_contiguous(src0) &&
ggml_is_contiguous(src1) &&
src1->type == GGML_TYPE_F32 &&
((src0->type == GGML_TYPE_F32) || (ne0 >= 32 && ne1 >= 32 && ne10 >= 32))) {
(ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) {
/*printf("BLAS: %d %d %d %d %d\n", ne0, ne1, ne10, ne00, ne01);*/
return true;
@ -83,7 +84,6 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg
// convert src0 to float
if (type != GGML_TYPE_F32) {
std::vector<std::future<void>> tasks;
ggml_to_float_t const to_float = type_traits.to_float;
for (int64_t i03 = 0; i03 < ne03; i03++) {
@ -98,7 +98,7 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg
}
#else
for (int i = 0; i < ctx->n_threads; i++) {
tasks.push_back(std::async(std::launch::async, [=]() {
ctx->tasks.push_back(std::async(std::launch::async, [=]() {
const int64_t start = i*ne01/ctx->n_threads;
const int64_t end = (i + 1)*ne01/ctx->n_threads;
for (int64_t i01 = start; i01 < end; i01++) {
@ -109,10 +109,14 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg
#endif
}
}
#ifndef GGML_USE_OPENMP
// wait for all tasks to finish
for (auto & task : tasks) {
for (auto & task : ctx->tasks) {
task.get();
}
ctx->tasks.clear();
#endif
}
for (int64_t i13 = 0; i13 < ne13; i13++) {

2
ggml.c
View file

@ -22645,7 +22645,7 @@ int ggml_cpu_has_wasm_simd(void) {
}
int ggml_cpu_has_blas(void) {
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_BLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
#if defined(GGML_USE_BLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
return 1;
#else
return 0;

View file

@ -21,7 +21,7 @@
# include "ggml-kompute.h"
#endif
#if defined(GGML_USE_BLAS) || defined(GGML_USE_ACCELERATE)
#ifdef GGML_USE_BLAS
# include "ggml-blas.h"
#endif
@ -2303,7 +2303,7 @@ struct llama_context {
#ifdef GGML_USE_METAL
ggml_backend_t backend_metal = nullptr;
#endif
#if defined(GGML_USE_BLAS) || defined(GGML_USE_ACCELERATE)
#ifdef GGML_USE_BLAS
ggml_backend_t backend_blas = nullptr;
#endif
ggml_backend_t backend_cpu = nullptr;
@ -12025,7 +12025,7 @@ static void llama_graph_compute(
ggml_backend_cpu_set_n_threads(lctx.backend_cpu, n_threads);
ggml_backend_cpu_set_abort_callback(lctx.backend_cpu, lctx.abort_callback, lctx.abort_callback_data);
}
#if defined(GGML_USE_BLAS) || defined(GGML_USE_ACCELERATE)
#ifdef GGML_USE_BLAS
if (lctx.backend_blas != nullptr) {
ggml_backend_blas_set_n_threads(lctx.backend_blas, n_threads);
}
@ -16240,7 +16240,7 @@ struct llama_context * llama_new_context_with_model(
}
#endif
#if defined(GGML_USE_BLAS) || defined(GGML_USE_ACCELERATE)
#ifdef GGML_USE_BLAS
ctx->backend_blas = ggml_backend_blas_init();
if (ctx->backend_blas == nullptr) {
LLAMA_LOG_WARN("%s: failed to initialize BLAS backend\n", __func__);