diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h index 06f962ee6..fd3deae00 100644 --- a/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h @@ -1,25 +1,5 @@ #pragma once -#include "ggml-impl.h" - -#if defined(_MSC_VER) || defined(__MINGW32__) -#include // using malloc.h with MSC/MINGW -#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) -#include -#endif - -#include - -#ifdef GGML_USE_CPU_HBM -#include -#endif - -#if defined(__APPLE__) -#include -#include -#include -#endif - // ggml-backend internal header #include "ggml-backend.h" @@ -242,72 +222,6 @@ extern "C" { // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function // typedef ggml_backend_register_t * (*ggml_backend_init)(void); - - // - // Memory allocation - // - -#if defined(_MSC_VER) || defined(__MINGW32__) -#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN) -#define GGML_ALIGNED_FREE(ptr, size) _aligned_free(ptr) -#else -inline static void * ggml_aligned_malloc(size_t size) { - if (size == 0) { - GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); - return NULL; - } - void * aligned_memory = NULL; -#ifdef GGML_USE_CPU_HBM - int result = hbw_posix_memalign(&aligned_memory, 16, size); -#elif TARGET_OS_OSX - kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); - int result = EFAULT; - switch (alloc_status) { - case KERN_SUCCESS: - result = 0; - break; - case KERN_INVALID_ADDRESS: - result = EINVAL; - break; - case KERN_NO_SPACE: - result = ENOMEM; - break; - default: - result = EFAULT; - break; - } -#elif GGML_USE_METAL - int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size); -#else - int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size); -#endif - if (result != 0) { - // Handle allocation failure - const char *error_desc = "unknown allocation error"; - switch (result) { - case EINVAL: - error_desc = "invalid alignment value"; - break; - case ENOMEM: - error_desc = "insufficient memory"; - break; - } - GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); - GGML_ABORT("fatal error"); - return NULL; - } - return aligned_memory; -} -#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size) -#ifdef GGML_USE_CPU_HBM -#define GGML_ALIGNED_FREE(ptr, size) if(NULL != ptr) hbw_free(ptr) -#elif TARGET_OS_OSX -#define GGML_ALIGNED_FREE(ptr, size) if(NULL != ptr) vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size) -#else -#define GGML_ALIGNED_FREE(ptr, size) free(ptr) -#endif -#endif - #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index ad2512c53..378bfd68a 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -702,7 +702,7 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) { } static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) { - GGML_ALIGNED_FREE(buffer->context, buffer->size); + ggml_aligned_free(buffer->context, buffer->size); } static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { @@ -770,7 +770,7 @@ static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_ty } static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { - void * data = GGML_ALIGNED_MALLOC(size); + void * data = ggml_aligned_malloc(size); if (data == NULL) { GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size); diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index d3f4bad8c..c88ae6069 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -196,6 +196,11 @@ struct ggml_cgraph { struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1); +// Memory allocation + +void * ggml_aligned_malloc(size_t size); +void ggml_aligned_free(void * ptr, size_t size); + #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index a2a82fd5c..ce041db91 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3,12 +3,17 @@ #include "ggml-backend.h" #include "ggml-impl.h" -#include "ggml-backend-impl.h" #include "ggml-cpu-impl.h" #include "ggml-quants.h" #include "ggml.h" #include "ggml-aarch64.h" +#if defined(_MSC_VER) || defined(__MINGW32__) +#include // using malloc.h with MSC/MINGW +#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) +#include +#endif + #include #include #include @@ -175,7 +180,13 @@ typedef void * thread_ret_t; typedef pthread_t ggml_thread_t; +#ifdef GGML_USE_CPU_HBM +#include +#endif + #if defined(__APPLE__) +#include +#include #include #endif @@ -374,6 +385,75 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi #endif +void * ggml_aligned_malloc(size_t size) { +#if defined(_MSC_VER) || defined(__MINGW32__) + return _aligned_malloc(size, GGML_MEM_ALIGN); +#else + if (size == 0) { + GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); + return NULL; + } + void * aligned_memory = NULL; +#ifdef GGML_USE_CPU_HBM + int result = hbw_posix_memalign(&aligned_memory, 16, size); +#elif TARGET_OS_OSX + kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); + int result = EFAULT; + switch (alloc_status) { + case KERN_SUCCESS: + result = 0; + break; + case KERN_INVALID_ADDRESS: + result = EINVAL; + break; + case KERN_NO_SPACE: + result = ENOMEM; + break; + default: + result = EFAULT; + break; + } +#elif GGML_USE_METAL + int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size); +#else + int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size); +#endif + if (result != 0) { + // Handle allocation failure + const char *error_desc = "unknown allocation error"; + switch (result) { + case EINVAL: + error_desc = "invalid alignment value"; + break; + case ENOMEM: + error_desc = "insufficient memory"; + break; + } + GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); + GGML_ABORT("fatal error"); + return NULL; + } + return aligned_memory; +#endif +} + +void ggml_aligned_free(void * ptr, size_t size) { +#if defined(_MSC_VER) || defined(__MINGW32__) + _aligned_free(ptr); +#elif GGML_USE_CPU_HBM + if(ptr != NULL) { + hbw_free(ptr); + } +#elif TARGET_OS_OSX + if(ptr != NULL) { + vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size); + } +#else + free(ptr); +#endif +} + + inline static void * ggml_malloc(size_t size) { if (size == 0) { GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n"); @@ -3815,7 +3895,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { *ctx = (struct ggml_context) { /*.mem_size =*/ mem_size, - /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size), + /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size), /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, /*.no_alloc =*/ params.no_alloc, /*.no_alloc_save =*/ params.no_alloc, @@ -3855,7 +3935,7 @@ void ggml_free(struct ggml_context * ctx) { __func__, i, ggml_used_mem(ctx)); if (ctx->mem_buffer_owned) { - GGML_ALIGNED_FREE(ctx->mem_buffer, ctx->mem_size); + ggml_aligned_free(ctx->mem_buffer, ctx->mem_size); } found = true; @@ -19577,8 +19657,9 @@ void ggml_threadpool_free(struct ggml_threadpool* threadpool) { ggml_cond_destroy(&threadpool->cond); #endif // GGML_USE_OPENMP - GGML_ALIGNED_FREE(threadpool->workers, sizeof(struct ggml_compute_state) * n_threads); - GGML_ALIGNED_FREE(threadpool, sizeof(struct ggml_threadpool)); + const size_t workers_size = sizeof(struct ggml_compute_state) * n_threads; + ggml_aligned_free(threadpool->workers, workers_size); + ggml_aligned_free(threadpool, sizeof(struct ggml_threadpool)); } #ifndef GGML_USE_OPENMP @@ -20010,7 +20091,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl( struct ggml_cplan * cplan) { struct ggml_threadpool * threadpool = - GGML_ALIGNED_MALLOC(sizeof(struct ggml_threadpool)); + ggml_aligned_malloc(sizeof(struct ggml_threadpool)); { threadpool->cgraph = cgraph; threadpool->cplan = cplan; @@ -20031,7 +20112,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl( // Allocate and init workers state const size_t workers_size = sizeof(struct ggml_compute_state) * tpp->n_threads; - struct ggml_compute_state * workers = GGML_ALIGNED_MALLOC(workers_size); + struct ggml_compute_state * workers = ggml_aligned_malloc(workers_size); memset(workers, 0, workers_size); for (int j = 0; j < tpp->n_threads; j++) {