llama : offload to RPC in addition to other backends (#7640)

* llama : offload to RPC in addition to other backends * - fix copy_tensor being called on the src buffer instead of the dst buffer - always initialize views in the view_src buffer - add RPC backend to Makefile build - add endpoint to all RPC object names * add rpc-server to Makefile * Update llama.cpp Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com>
2024-06-03 20:03:26 +03:00 · 2024-06-03 20:03:26 +03:00 · bde7cd3cd9
commit bde7cd3cd9
parent a5735e4426
6 changed files with 86 additions and 53 deletions
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@ -750,7 +750,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
                // this tensor was allocated without ggml-backend
                return;
            }
-            ggml_backend_view_init(galloc->buffers[buffer_id], tensor);
+            ggml_backend_view_init(tensor);
        }
    } else {
        if (tensor->data == NULL) {
@ -899,12 +899,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
            if (t->view_src == NULL) {
                ggml_tallocr_alloc(&tallocr, t);
            } else if (t->buffer == NULL) {
-                ggml_backend_view_init(buffer, t);
+                ggml_backend_view_init(t);
            }
        } else {
            if (t->view_src != NULL && t->buffer == NULL) {
                // view of a pre-allocated tensor
-                ggml_backend_view_init(buffer, t);
+                ggml_backend_view_init(t);
            }
        }
    }