fix apple build
This commit is contained in:
parent
2bfdb7fe4e
commit
0425305d32
6 changed files with 22 additions and 14 deletions
|
@ -39,8 +39,12 @@ endif()
|
|||
|
||||
if (APPLE)
|
||||
set(LLAMA_METAL_DEFAULT ON)
|
||||
set(LLAMA_BLAS_DEFAULT ON)
|
||||
set(LLAMA_BLAS_VENDOR_DEFAULT "Apple")
|
||||
else()
|
||||
set(LLAMA_METAL_DEFAULT OFF)
|
||||
set(LLAMA_BLAS_DEFAULT OFF)
|
||||
set(LLAMA_BLAS_VENDOR_DEFAULT "Generic")
|
||||
endif()
|
||||
|
||||
set(LLAMA_LLAMAFILE_DEFAULT ON)
|
||||
|
@ -91,8 +95,9 @@ endif()
|
|||
|
||||
# 3rd party libs
|
||||
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
||||
option(LLAMA_BLAS "llama: use BLAS" OFF)
|
||||
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
||||
option(LLAMA_BLAS "llama: use BLAS" ${LLAMA_BLAS_DEFAULT})
|
||||
set(LLAMA_BLAS_VENDOR ${LLAMA_BLAS_VENDOR_DEFAULT} CACHE STRING
|
||||
"llama: BLAS library vendor")
|
||||
option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT})
|
||||
option(LLAMA_CUDA "llama: use CUDA" OFF)
|
||||
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
|
||||
|
@ -321,7 +326,7 @@ if (LLAMA_BLAS)
|
|||
if (BLAS_FOUND)
|
||||
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
|
||||
|
||||
if ("${BLAS_INCLUDE_DIRS}" STREQUAL "")
|
||||
if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${LLAMA_BLAS_VENDOR} MATCHES "Apple"))
|
||||
# BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
|
||||
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
|
||||
find_package(PkgConfig REQUIRED)
|
||||
|
|
2
Makefile
2
Makefile
|
@ -404,7 +404,7 @@ ifndef LLAMA_NO_ACCELERATE
|
|||
# Mac OS - include Accelerate framework.
|
||||
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
MK_CPPFLAGS += -DGGML_USE_ACCELERATE
|
||||
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
|
||||
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
|
||||
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
|
||||
MK_LDFLAGS += -framework Accelerate
|
||||
|
|
|
@ -706,7 +706,6 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
|||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
struct node_alloc * node_alloc = &galloc->node_allocs[i];
|
||||
//node_alloc->buffer_id = get_node_buffer_id(node_buffer_ids, i);
|
||||
if (node->view_src || node->data) {
|
||||
node_alloc->dst.buffer_id = -1;
|
||||
node_alloc->dst.offset = SIZE_MAX;
|
||||
|
|
|
@ -16,6 +16,7 @@ struct ggml_backend_blas_context {
|
|||
int n_threads;
|
||||
char * work_data;
|
||||
size_t work_size;
|
||||
std::vector<std::future<void>> tasks;
|
||||
};
|
||||
|
||||
// helper function to determine if it is better to use BLAS or not
|
||||
|
@ -33,7 +34,7 @@ static bool ggml_backend_blas_use_blas(const struct ggml_tensor * dst) {
|
|||
if (ggml_is_contiguous(src0) &&
|
||||
ggml_is_contiguous(src1) &&
|
||||
src1->type == GGML_TYPE_F32 &&
|
||||
((src0->type == GGML_TYPE_F32) || (ne0 >= 32 && ne1 >= 32 && ne10 >= 32))) {
|
||||
(ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) {
|
||||
|
||||
/*printf("BLAS: %d %d %d %d %d\n", ne0, ne1, ne10, ne00, ne01);*/
|
||||
return true;
|
||||
|
@ -83,7 +84,6 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg
|
|||
|
||||
// convert src0 to float
|
||||
if (type != GGML_TYPE_F32) {
|
||||
std::vector<std::future<void>> tasks;
|
||||
ggml_to_float_t const to_float = type_traits.to_float;
|
||||
|
||||
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
||||
|
@ -98,7 +98,7 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg
|
|||
}
|
||||
#else
|
||||
for (int i = 0; i < ctx->n_threads; i++) {
|
||||
tasks.push_back(std::async(std::launch::async, [=]() {
|
||||
ctx->tasks.push_back(std::async(std::launch::async, [=]() {
|
||||
const int64_t start = i*ne01/ctx->n_threads;
|
||||
const int64_t end = (i + 1)*ne01/ctx->n_threads;
|
||||
for (int64_t i01 = start; i01 < end; i01++) {
|
||||
|
@ -109,10 +109,14 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg
|
|||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef GGML_USE_OPENMP
|
||||
// wait for all tasks to finish
|
||||
for (auto & task : tasks) {
|
||||
for (auto & task : ctx->tasks) {
|
||||
task.get();
|
||||
}
|
||||
ctx->tasks.clear();
|
||||
#endif
|
||||
}
|
||||
|
||||
for (int64_t i13 = 0; i13 < ne13; i13++) {
|
||||
|
|
2
ggml.c
2
ggml.c
|
@ -22645,7 +22645,7 @@ int ggml_cpu_has_wasm_simd(void) {
|
|||
}
|
||||
|
||||
int ggml_cpu_has_blas(void) {
|
||||
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_BLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
|
||||
#if defined(GGML_USE_BLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
# include "ggml-kompute.h"
|
||||
#endif
|
||||
|
||||
#if defined(GGML_USE_BLAS) || defined(GGML_USE_ACCELERATE)
|
||||
#ifdef GGML_USE_BLAS
|
||||
# include "ggml-blas.h"
|
||||
#endif
|
||||
|
||||
|
@ -2303,7 +2303,7 @@ struct llama_context {
|
|||
#ifdef GGML_USE_METAL
|
||||
ggml_backend_t backend_metal = nullptr;
|
||||
#endif
|
||||
#if defined(GGML_USE_BLAS) || defined(GGML_USE_ACCELERATE)
|
||||
#ifdef GGML_USE_BLAS
|
||||
ggml_backend_t backend_blas = nullptr;
|
||||
#endif
|
||||
ggml_backend_t backend_cpu = nullptr;
|
||||
|
@ -12025,7 +12025,7 @@ static void llama_graph_compute(
|
|||
ggml_backend_cpu_set_n_threads(lctx.backend_cpu, n_threads);
|
||||
ggml_backend_cpu_set_abort_callback(lctx.backend_cpu, lctx.abort_callback, lctx.abort_callback_data);
|
||||
}
|
||||
#if defined(GGML_USE_BLAS) || defined(GGML_USE_ACCELERATE)
|
||||
#ifdef GGML_USE_BLAS
|
||||
if (lctx.backend_blas != nullptr) {
|
||||
ggml_backend_blas_set_n_threads(lctx.backend_blas, n_threads);
|
||||
}
|
||||
|
@ -16240,7 +16240,7 @@ struct llama_context * llama_new_context_with_model(
|
|||
}
|
||||
#endif
|
||||
|
||||
#if defined(GGML_USE_BLAS) || defined(GGML_USE_ACCELERATE)
|
||||
#ifdef GGML_USE_BLAS
|
||||
ctx->backend_blas = ggml_backend_blas_init();
|
||||
if (ctx->backend_blas == nullptr) {
|
||||
LLAMA_LOG_WARN("%s: failed to initialize BLAS backend\n", __func__);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue