ggml : add is_ram_shared to ggml_backend

Metal can share the RAM memory and can utilize mmap without temp buffer
This commit is contained in:
Georgi Gerganov 2023-07-18 18:51:02 +03:00
parent 90503f150d
commit 652c849643
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
4 changed files with 68 additions and 21 deletions

View file

@ -61,7 +61,10 @@ extern "C" {
struct ggml_backend {
struct ggml_backend_interface * interface;
ggml_backend_context_t context;
bool is_ram_shared;
};
// backend helper functions
@ -78,7 +81,16 @@ extern "C" {
static inline void ggml_backend_graph_compute(struct ggml_backend * backend, struct ggml_cgraph * cgraph) { backend->interface->graph_compute(backend->context, cgraph); }
// buffer and tensor allocation
GGML_API struct ggml_buffer ggml_backend_alloc_buffer(struct ggml_backend * backend, size_t size, size_t max_tensors); // GG: probably return ptr
// TODO:
// - return "struct ggml_buffer *"
// - fix namings:
// - ggml_backend_alloc_buffer -> ggml_backend_buffer_alloc
// - ggml_backend_free_buffer -> ggml_backend_buffer_free
// - ggml_backend_reset_buffer -> ggml_backend_buffer_reset
// - ggml_backend_alloc_tensor -> ggml_backend_tensor_alloc
// - ggml_backend_tensor_cpy -> ggml_backend_tensor_copy
//
GGML_API struct ggml_buffer ggml_backend_alloc_buffer(struct ggml_backend * backend, size_t size, size_t max_tensors);
GGML_API void ggml_backend_free_buffer(struct ggml_buffer * buffer);
static inline void ggml_backend_reset_buffer(struct ggml_buffer * buffer) { buffer->backend->interface->reset_buffer(buffer->backend->context, buffer->backend_buffer); }
static inline void ggml_backend_alloc_tensor(struct ggml_buffer * buffer, struct ggml_tensor * tensor) { buffer->backend->interface->alloc_tensor(buffer->backend->context, buffer->backend_buffer, tensor); }