diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h index fd3deae00..b61ba0454 100644 --- a/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h @@ -1,5 +1,25 @@ #pragma once +#include "ggml-impl.h" + +#if defined(_MSC_VER) || defined(__MINGW32__) +#include // using malloc.h with MSC/MINGW +#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) +#include +#endif + +#include + +#ifdef GGML_USE_CPU_HBM +#include +#endif + +#if defined(__APPLE__) +#include +#include +#include +#endif + // ggml-backend internal header #include "ggml-backend.h" @@ -222,6 +242,75 @@ extern "C" { // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function // typedef ggml_backend_register_t * (*ggml_backend_init)(void); + + // + // Memory allocation + // + +#if defined(_MSC_VER) || defined(__MINGW32__) +#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN) +#define GGML_ALIGNED_FREE(ptr, size) _aligned_free(ptr) +#else +inline static void * ggml_aligned_malloc(size_t size) { + if (size == 0) { + GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); + return NULL; + } + void * aligned_memory = NULL; +#ifdef GGML_USE_CPU_HBM + int result = hbw_posix_memalign(&aligned_memory, 16, size); +#elif TARGET_OS_OSX + kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); + int result = EFAULT; + switch (alloc_status) { + case KERN_SUCCESS: + result = 0; + break; + + case KERN_INVALID_ADDRESS: + result = EINVAL; + break; + + case KERN_NO_SPACE: + result = ENOMEM; + break; + + default: + result = EFAULT; + break; + } +#elif GGML_USE_METAL + int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size); +#else + int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size); +#endif + if (result != 0) { + // Handle allocation failure + const char *error_desc = "unknown allocation error"; + switch (result) { + case EINVAL: + error_desc = "invalid alignment value"; + break; + case ENOMEM: + error_desc = "insufficient memory"; + break; + } + GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); + GGML_ABORT("fatal error"); + return NULL; + } + return aligned_memory; +} +#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size) +#ifdef GGML_USE_CPU_HBM +#define GGML_ALIGNED_FREE(ptr, size) if(NULL != ptr) hbw_free(ptr) +#elif TARGET_OS_OSX +#define GGML_ALIGNED_FREE(ptr, size) if(NULL != ptr) vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size) +#else +#define GGML_ALIGNED_FREE(ptr, size) free(ptr) +#endif +#endif + #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index 263b3e75d..ad2512c53 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -24,8 +24,6 @@ #ifdef __APPLE__ #include #include -#include -#include #endif @@ -704,7 +702,7 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) { } static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) { - free(buffer->context); + GGML_ALIGNED_FREE(buffer->context, buffer->size); } static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { @@ -772,21 +770,12 @@ static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_ty } static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { -#if defined(GGML_USE_METAL) || defined(TARGET_OS_OSX) - void * data = NULL; - int result = posix_memalign(&data, sysconf(_SC_PAGESIZE), size); - if (result != 0) { - GGML_LOG_ERROR("%s: failed to allocate buffer using posix_memalign with size %zu\n", __func__, size); - return NULL; - } -#else - size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned - void * data = malloc(size); // TODO: use GGML_ALIGNED_MALLOC (move to ggml-impl.h) + void * data = GGML_ALIGNED_MALLOC(size); + if (data == NULL) { GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size); return NULL; } -#endif return ggml_backend_buffer_init(buft, ggml_backend_cpu_buffer_i, data, size); } diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 3f01092d9..48437b1e0 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3,17 +3,12 @@ #include "ggml-backend.h" #include "ggml-impl.h" +#include "ggml-backend-impl.h" #include "ggml-cpu-impl.h" #include "ggml-quants.h" #include "ggml.h" #include "ggml-aarch64.h" -#if defined(_MSC_VER) || defined(__MINGW32__) -#include // using malloc.h with MSC/MINGW -#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) -#include -#endif - #include #include #include @@ -35,10 +30,6 @@ #include #endif -#ifdef GGML_USE_METAL -#include -#endif - #if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8) #undef GGML_USE_LLAMAFILE #endif @@ -184,10 +175,6 @@ typedef void * thread_ret_t; typedef pthread_t ggml_thread_t; -#ifdef GGML_USE_CPU_HBM -#include -#endif - #if defined(__APPLE__) #include #endif @@ -386,47 +373,6 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi //#define GGML_SOFT_MAX_ACCELERATE #endif -#if defined(_MSC_VER) || defined(__MINGW32__) -#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN) -#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr) -#else -inline static void * ggml_aligned_malloc(size_t size) { - if (size == 0) { - GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); - return NULL; - } - void * aligned_memory = NULL; -#ifdef GGML_USE_CPU_HBM - int result = hbw_posix_memalign(&aligned_memory, 16, size); -#elif GGML_USE_METAL - int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size); -#else - int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size); -#endif - if (result != 0) { - // Handle allocation failure - const char *error_desc = "unknown allocation error"; - switch (result) { - case EINVAL: - error_desc = "invalid alignment value"; - break; - case ENOMEM: - error_desc = "insufficient memory"; - break; - } - GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); - GGML_ABORT("fatal error"); - return NULL; - } - return aligned_memory; -} -#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size) -#ifdef GGML_USE_CPU_HBM -#define GGML_ALIGNED_FREE(ptr) if(NULL != ptr) hbw_free(ptr) -#else -#define GGML_ALIGNED_FREE(ptr) free(ptr) -#endif -#endif inline static void * ggml_malloc(size_t size) { if (size == 0) { @@ -3909,7 +3855,7 @@ void ggml_free(struct ggml_context * ctx) { __func__, i, ggml_used_mem(ctx)); if (ctx->mem_buffer_owned) { - GGML_ALIGNED_FREE(ctx->mem_buffer); + GGML_ALIGNED_FREE(ctx->mem_buffer, ctx->mem_size); } found = true; @@ -19630,8 +19576,9 @@ void ggml_threadpool_free(struct ggml_threadpool* threadpool) { ggml_cond_destroy(&threadpool->cond); #endif // GGML_USE_OPENMP - GGML_ALIGNED_FREE(threadpool->workers); - GGML_ALIGNED_FREE(threadpool); + const size_t workers_size = sizeof(struct ggml_compute_state) * n_threads; + GGML_ALIGNED_FREE(threadpool->workers, workers_size); + GGML_ALIGNED_FREE(threadpool, sizeof(struct ggml_threadpool)); } #ifndef GGML_USE_OPENMP