diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h
index fd3deae00..b61ba0454 100644
--- a/ggml/src/ggml-backend-impl.h
+++ b/ggml/src/ggml-backend-impl.h
@@ -1,5 +1,25 @@
 #pragma once
 
+#include "ggml-impl.h"
+
+#if defined(_MSC_VER) || defined(__MINGW32__)
+#include <malloc.h> // using malloc.h with MSC/MINGW
+#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
+#include <alloca.h>
+#endif
+
+#include <errno.h>
+
+#ifdef GGML_USE_CPU_HBM
+#include <hbwmalloc.h>
+#endif
+
+#if defined(__APPLE__)
+#include <unistd.h>
+#include <mach/mach.h>
+#include <TargetConditionals.h>
+#endif
+
 // ggml-backend internal header
 
 #include "ggml-backend.h"
@@ -222,6 +242,75 @@ extern "C" {
     // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function
     // typedef ggml_backend_register_t * (*ggml_backend_init)(void);
 
+
+    //
+    // Memory allocation
+    //
+
+#if defined(_MSC_VER) || defined(__MINGW32__)
+#define GGML_ALIGNED_MALLOC(size)       _aligned_malloc(size, GGML_MEM_ALIGN)
+#define GGML_ALIGNED_FREE(ptr, size)    _aligned_free(ptr)
+#else
+inline static void * ggml_aligned_malloc(size_t size) {
+    if (size == 0) {
+        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
+        return NULL;
+    }
+    void * aligned_memory = NULL;
+#ifdef GGML_USE_CPU_HBM
+    int result = hbw_posix_memalign(&aligned_memory, 16, size);
+#elif TARGET_OS_OSX
+    kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
+    int result = EFAULT;
+    switch (alloc_status) {
+        case KERN_SUCCESS:
+            result = 0;
+            break;
+        
+        case KERN_INVALID_ADDRESS:
+            result = EINVAL;
+            break;
+
+        case KERN_NO_SPACE:
+            result = ENOMEM;
+            break;
+
+        default:
+            result = EFAULT;
+            break;
+    }
+#elif GGML_USE_METAL
+    int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size);
+#else
+    int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size);
+#endif
+    if (result != 0) {
+        // Handle allocation failure
+        const char *error_desc = "unknown allocation error";
+        switch (result) {
+            case EINVAL:
+                error_desc = "invalid alignment value";
+                break;
+            case ENOMEM:
+                error_desc = "insufficient memory";
+                break;
+        }
+        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
+        GGML_ABORT("fatal error");
+        return NULL;
+    }
+    return aligned_memory;
+}
+#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
+#ifdef GGML_USE_CPU_HBM
+#define GGML_ALIGNED_FREE(ptr, size)    if(NULL != ptr) hbw_free(ptr)
+#elif TARGET_OS_OSX
+#define GGML_ALIGNED_FREE(ptr, size)    if(NULL != ptr) vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size)
+#else
+#define GGML_ALIGNED_FREE(ptr, size)    free(ptr)
+#endif
+#endif
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index 263b3e75d..ad2512c53 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -24,8 +24,6 @@
 #ifdef __APPLE__
 #include <sys/types.h>
 #include <sys/sysctl.h>
-#include <unistd.h>
-#include <TargetConditionals.h>
 #endif
 
 
@@ -704,7 +702,7 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
 }
 
 static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
-    free(buffer->context);
+    GGML_ALIGNED_FREE(buffer->context, buffer->size);
 }
 
 static void ggml_backend_cpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
@@ -772,21 +770,12 @@ static const char * ggml_backend_cpu_buffer_type_get_name(ggml_backend_buffer_ty
 }
 
 static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
-#if defined(GGML_USE_METAL) || defined(TARGET_OS_OSX)
-    void * data = NULL;
-    int result = posix_memalign(&data, sysconf(_SC_PAGESIZE), size);
-    if (result != 0) {
-        GGML_LOG_ERROR("%s: failed to allocate buffer using posix_memalign with size %zu\n", __func__, size);
-        return NULL;
-    }
-#else
-    size += TENSOR_ALIGNMENT;   // malloc may return an address that is not aligned
-    void * data = malloc(size); // TODO: use GGML_ALIGNED_MALLOC (move to ggml-impl.h)
+    void * data = GGML_ALIGNED_MALLOC(size);
+
     if (data == NULL) {
         GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size);
         return NULL;
     }
-#endif
 
     return ggml_backend_buffer_init(buft, ggml_backend_cpu_buffer_i, data, size);
 }
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 3f01092d9..48437b1e0 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -3,17 +3,12 @@
 
 #include "ggml-backend.h"
 #include "ggml-impl.h"
+#include "ggml-backend-impl.h"
 #include "ggml-cpu-impl.h"
 #include "ggml-quants.h"
 #include "ggml.h"
 #include "ggml-aarch64.h"
 
-#if defined(_MSC_VER) || defined(__MINGW32__)
-#include <malloc.h> // using malloc.h with MSC/MINGW
-#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
-#include <alloca.h>
-#endif
-
 #include <assert.h>
 #include <errno.h>
 #include <time.h>
@@ -35,10 +30,6 @@
 #include <omp.h>
 #endif
 
-#ifdef GGML_USE_METAL
-#include <unistd.h>
-#endif
-
 #if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
 #undef GGML_USE_LLAMAFILE
 #endif
@@ -184,10 +175,6 @@ typedef void * thread_ret_t;
 
 typedef pthread_t ggml_thread_t;
 
-#ifdef GGML_USE_CPU_HBM
-#include <hbwmalloc.h>
-#endif
-
 #if defined(__APPLE__)
 #include <TargetConditionals.h>
 #endif
@@ -386,47 +373,6 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi
 //#define GGML_SOFT_MAX_ACCELERATE
 #endif
 
-#if defined(_MSC_VER) || defined(__MINGW32__)
-#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
-#define GGML_ALIGNED_FREE(ptr)    _aligned_free(ptr)
-#else
-inline static void * ggml_aligned_malloc(size_t size) {
-    if (size == 0) {
-        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
-        return NULL;
-    }
-    void * aligned_memory = NULL;
-#ifdef GGML_USE_CPU_HBM
-    int result = hbw_posix_memalign(&aligned_memory, 16, size);
-#elif GGML_USE_METAL
-    int result = posix_memalign(&aligned_memory, sysconf(_SC_PAGESIZE), size);
-#else
-    int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size);
-#endif
-    if (result != 0) {
-        // Handle allocation failure
-        const char *error_desc = "unknown allocation error";
-        switch (result) {
-            case EINVAL:
-                error_desc = "invalid alignment value";
-                break;
-            case ENOMEM:
-                error_desc = "insufficient memory";
-                break;
-        }
-        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
-        GGML_ABORT("fatal error");
-        return NULL;
-    }
-    return aligned_memory;
-}
-#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
-#ifdef GGML_USE_CPU_HBM
-#define GGML_ALIGNED_FREE(ptr)    if(NULL != ptr) hbw_free(ptr)
-#else
-#define GGML_ALIGNED_FREE(ptr)    free(ptr)
-#endif
-#endif
 
 inline static void * ggml_malloc(size_t size) {
     if (size == 0) {
@@ -3909,7 +3855,7 @@ void ggml_free(struct ggml_context * ctx) {
                     __func__, i, ggml_used_mem(ctx));
 
             if (ctx->mem_buffer_owned) {
-                GGML_ALIGNED_FREE(ctx->mem_buffer);
+                GGML_ALIGNED_FREE(ctx->mem_buffer, ctx->mem_size);
             }
 
             found = true;
@@ -19630,8 +19576,9 @@ void ggml_threadpool_free(struct ggml_threadpool* threadpool) {
     ggml_cond_destroy(&threadpool->cond);
 #endif // GGML_USE_OPENMP
 
-    GGML_ALIGNED_FREE(threadpool->workers);
-    GGML_ALIGNED_FREE(threadpool);
+    const size_t workers_size = sizeof(struct ggml_compute_state) * n_threads;
+    GGML_ALIGNED_FREE(threadpool->workers, workers_size);
+    GGML_ALIGNED_FREE(threadpool, sizeof(struct ggml_threadpool));
 }
 
 #ifndef GGML_USE_OPENMP