metal : handle buffers larger than device's maxBufferLength (#1826)

* metal : handle buffers larger than device's maxBufferLength * metal : print more verbose device info + handle errors * metal : fix prints for overlapping views * metal : minimize view overlap to try to utilize device memory better
2023-06-18 09:09:47 +03:00 · 2023-06-18 09:09:47 +03:00 · ce2c7d72e2
commit ce2c7d72e2
parent 57cd69460f
6 changed files with 125 additions and 35 deletions
--- a/ggml.c
+++ b/ggml.c
@ -4154,14 +4154,34 @@ void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
    ctx->no_alloc = no_alloc;
 }

-void * ggml_get_mem_buffer(struct ggml_context * ctx) {
+void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
    return ctx->mem_buffer;
 }

-size_t ggml_get_mem_size(struct ggml_context * ctx) {
+size_t ggml_get_mem_size(const struct ggml_context * ctx) {
    return ctx->mem_size;
 }

+size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
+    size_t max_size = 0;
+
+    struct ggml_object * obj = ctx->objects_begin;
+
+    while (obj != NULL) {
+        struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
+
+        const size_t size = ggml_nbytes(tensor);
+
+        if (max_size < size) {
+            max_size = size;
+        }
+
+        obj = obj->next;
+    }
+
+    return max_size;
+}
+
 // IMPORTANT:
 // when creating "opt" tensors, always save and load the scratch buffer
 // this is an error prone process, but it is necessary to support inplace