fix: use vm_allocate to allocate CPU backend buffer on macOS (#9875)

* fix: use `vm_allocate` to allocate CPU backend buffer on macOS * fix: switch to `posix_memalign` to keep existing `free()` usages work * feat: move `GGML_ALIGNED_MALLOC` to `ggml-backend-impl.h`, add support for `vm_allocate` on macOS * style: formatting * fix: move const outside of `#ifndef` * style: formatting * fix: unused var * fix: transform `GGML_ALIGNED_MALLOC` and `GGML_ALIGNED_FREE` into functions and add them to `ggml-impl.h` * fix: unused var * fix: page align to `GGUF_DEFAULT_ALIGNMENT` * fix: page align to `TENSOR_ALIGNMENT` * fix: convert `TENSOR_ALIGNMENT` to a macro * fix: increase page size to `32` on iOS * fix: iOS page size * fix: `hbw_posix_memalign` alignment
2024-10-17 01:36:51 +03:00 · 2024-10-17 01:36:51 +03:00 · 73afe681aa
commit 73afe681aa
parent 9e04102448
3 changed files with 63 additions and 27 deletions
--- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h
@ -19,6 +19,9 @@ extern "C" {
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define MAX(a, b) ((a) > (b) ? (a) : (b))

+// required for mmap as gguf only guarantees 32-byte alignment
+#define TENSOR_ALIGNMENT 32
+
 // static_assert should be a #define, but if it's not,
 // fall back to the _Static_assert C11 keyword.
 // if C99 - static_assert is noop
@ -196,6 +199,11 @@ struct ggml_cgraph {

 struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);

+// Memory allocation
+
+void * ggml_aligned_malloc(size_t size);
+void ggml_aligned_free(void * ptr, size_t size);
+
 #ifdef __cplusplus
 }
 #endif