metal : use shared buffers between CPU and GPU (#1696)

* Use MTLDevice.newBufferWithBytesNoCopy to share buffers between CPU and GPU

* Page-align buffers used by Metal

* Remove trailing whitespace

* Only import unistd.h for Metal builds

* metal : remove unnecessary copies

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
kiltyj 2023-06-05 13:24:04 -07:00 committed by GitHub
parent efe0507632
commit 9d0693bce3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 38 additions and 16 deletions

8
ggml.c
View file

@ -22,6 +22,10 @@
#include <float.h>
#include <limits.h>
#ifdef GGML_USE_METAL
#include <unistd.h>
#endif
// if C99 - static_assert is noop
// ref: https://stackoverflow.com/a/53923785/4039976
#ifndef static_assert
@ -122,7 +126,11 @@ typedef void* thread_ret_t;
#else
inline static void* ggml_aligned_malloc(size_t size) {
void* aligned_memory = NULL;
#ifdef GGML_USE_METAL
int result = posix_memalign(&aligned_memory, getpagesize(), size);
#else
int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size);
#endif
if (result != 0) {
// Handle allocation failure
return NULL;