feat: move GGML_ALIGNED_MALLOC
to ggml-backend-impl.h
, add support for vm_allocate
on macOS
This commit is contained in:
parent
cec2d4e265
commit
51ccdebc84
3 changed files with 97 additions and 72 deletions
|
@ -1,5 +1,25 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#include <malloc.h> // using malloc.h with MSC/MINGW
|
||||
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#ifdef GGML_USE_CPU_HBM
|
||||
#include <hbwmalloc.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <unistd.h>
|
||||
#include <mach/mach.h>
|
||||
#include <TargetConditionals.h>
|
||||
#endif
|
||||
|
||||
// ggml-backend internal header
|
||||
|
||||
#include "ggml-backend.h"
|
||||
|
@ -222,6 +242,75 @@ extern "C" {
|
|||
// TODO: backends can be loaded as a dynamic library, in which case it needs to export this function
|
||||
// typedef ggml_backend_register_t * (*ggml_backend_init)(void);
|
||||
|
||||
|
||||
//
|
||||
// Memory allocation
|
||||
//
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
||||
#define GGML_ALIGNED_FREE(ptr, size) _aligned_free(ptr)
|
||||
#else
|
||||
inline static void * ggml_aligned_malloc(size_t size) {
|
||||
if (size == 0) {
|
||||
GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
|
||||
return NULL;
|
||||
}
|
||||
void * aligned_memory = NULL;
|
||||
#ifdef GGML_USE_CPU_HBM
|
||||
int result = hbw_posix_memalign(&aligned_memory, 16, size);
|
||||
#elif TARGET_OS_OSX
|
||||
kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
|
||||
int result = EFAULT;
|
||||
switch (alloc_status) {
|
||||
case KERN_SUCCESS:
|
||||
result = 0;
|
||||
break;
|
||||
|
||||
case KERN_INVALID_ADDRESS:
|
||||
result = EINVAL;
|
||||
break;
|
||||
|
||||
case KERN_NO_SPACE:
|
||||
result = ENOMEM;
|
||||
break;
|
||||
|
||||
default:
|
||||
result = EFAULT;
|
||||
break;
|
||||
}
|
||||
#elif GGML_USE_METAL
|
||||
int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size);
|
||||
#else
|
||||
int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size);
|
||||
#endif
|
||||
if (result != 0) {
|
||||
// Handle allocation failure
|
||||
const char *error_desc = "unknown allocation error";
|
||||
switch (result) {
|
||||
case EINVAL:
|
||||
error_desc = "invalid alignment value";
|
||||
break;
|
||||
case ENOMEM:
|
||||
error_desc = "insufficient memory";
|
||||
break;
|
||||
}
|
||||
GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
|
||||
GGML_ABORT("fatal error");
|
||||
return NULL;
|
||||
}
|
||||
return aligned_memory;
|
||||
}
|
||||
#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
|
||||
#ifdef GGML_USE_CPU_HBM
|
||||
#define GGML_ALIGNED_FREE(ptr, size) if(NULL != ptr) hbw_free(ptr)
|
||||
#elif TARGET_OS_OSX
|
||||
#define GGML_ALIGNED_FREE(ptr, size) if(NULL != ptr) vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size)
|
||||
#else
|
||||
#define GGML_ALIGNED_FREE(ptr, size) free(ptr)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -24,8 +24,6 @@
|
|||
#ifdef __APPLE__
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <unistd.h>
|
||||
#include <TargetConditionals.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -704,7 +702,7 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|||
}
|
||||
|
||||
static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
free(buffer->context);
|
||||
GGML_ALIGNED_FREE(buffer->context, buffer->size);
|
||||
}
|
||||
|
||||
static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
|
||||
|
@ -772,21 +770,12 @@ static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_ty
|
|||
}
|
||||
|
||||
static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
||||
#if defined(GGML_USE_METAL) || defined(TARGET_OS_OSX)
|
||||
void * data = NULL;
|
||||
int result = posix_memalign(&data, sysconf(_SC_PAGESIZE), size);
|
||||
if (result != 0) {
|
||||
GGML_LOG_ERROR("%s: failed to allocate buffer using posix_memalign with size %zu\n", __func__, size);
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned
|
||||
void * data = malloc(size); // TODO: use GGML_ALIGNED_MALLOC (move to ggml-impl.h)
|
||||
void * data = GGML_ALIGNED_MALLOC(size);
|
||||
|
||||
if (data == NULL) {
|
||||
GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
return ggml_backend_buffer_init(buft, ggml_backend_cpu_buffer_i, data, size);
|
||||
}
|
||||
|
|
|
@ -3,17 +3,12 @@
|
|||
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-impl.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-cpu-impl.h"
|
||||
#include "ggml-quants.h"
|
||||
#include "ggml.h"
|
||||
#include "ggml-aarch64.h"
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#include <malloc.h> // using malloc.h with MSC/MINGW
|
||||
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <time.h>
|
||||
|
@ -35,10 +30,6 @@
|
|||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_METAL
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
#undef GGML_USE_LLAMAFILE
|
||||
#endif
|
||||
|
@ -184,10 +175,6 @@ typedef void * thread_ret_t;
|
|||
|
||||
typedef pthread_t ggml_thread_t;
|
||||
|
||||
#ifdef GGML_USE_CPU_HBM
|
||||
#include <hbwmalloc.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <TargetConditionals.h>
|
||||
#endif
|
||||
|
@ -386,47 +373,6 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi
|
|||
//#define GGML_SOFT_MAX_ACCELERATE
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
||||
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
||||
#else
|
||||
inline static void * ggml_aligned_malloc(size_t size) {
|
||||
if (size == 0) {
|
||||
GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
|
||||
return NULL;
|
||||
}
|
||||
void * aligned_memory = NULL;
|
||||
#ifdef GGML_USE_CPU_HBM
|
||||
int result = hbw_posix_memalign(&aligned_memory, 16, size);
|
||||
#elif GGML_USE_METAL
|
||||
int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size);
|
||||
#else
|
||||
int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size);
|
||||
#endif
|
||||
if (result != 0) {
|
||||
// Handle allocation failure
|
||||
const char *error_desc = "unknown allocation error";
|
||||
switch (result) {
|
||||
case EINVAL:
|
||||
error_desc = "invalid alignment value";
|
||||
break;
|
||||
case ENOMEM:
|
||||
error_desc = "insufficient memory";
|
||||
break;
|
||||
}
|
||||
GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
|
||||
GGML_ABORT("fatal error");
|
||||
return NULL;
|
||||
}
|
||||
return aligned_memory;
|
||||
}
|
||||
#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
|
||||
#ifdef GGML_USE_CPU_HBM
|
||||
#define GGML_ALIGNED_FREE(ptr) if(NULL != ptr) hbw_free(ptr)
|
||||
#else
|
||||
#define GGML_ALIGNED_FREE(ptr) free(ptr)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
inline static void * ggml_malloc(size_t size) {
|
||||
if (size == 0) {
|
||||
|
@ -3909,7 +3855,7 @@ void ggml_free(struct ggml_context * ctx) {
|
|||
__func__, i, ggml_used_mem(ctx));
|
||||
|
||||
if (ctx->mem_buffer_owned) {
|
||||
GGML_ALIGNED_FREE(ctx->mem_buffer);
|
||||
GGML_ALIGNED_FREE(ctx->mem_buffer, ctx->mem_size);
|
||||
}
|
||||
|
||||
found = true;
|
||||
|
@ -19630,8 +19576,9 @@ void ggml_threadpool_free(struct ggml_threadpool* threadpool) {
|
|||
ggml_cond_destroy(&threadpool->cond);
|
||||
#endif // GGML_USE_OPENMP
|
||||
|
||||
GGML_ALIGNED_FREE(threadpool->workers);
|
||||
GGML_ALIGNED_FREE(threadpool);
|
||||
const size_t workers_size = sizeof(struct ggml_compute_state) * n_threads;
|
||||
GGML_ALIGNED_FREE(threadpool->workers, workers_size);
|
||||
GGML_ALIGNED_FREE(threadpool, sizeof(struct ggml_threadpool));
|
||||
}
|
||||
|
||||
#ifndef GGML_USE_OPENMP
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue