llama : offload to RPC in addition to other backends (#7640)

* llama : offload to RPC in addition to other backends * - fix copy_tensor being called on the src buffer instead of the dst buffer - always initialize views in the view_src buffer - add RPC backend to Makefile build - add endpoint to all RPC object names * add rpc-server to Makefile * Update llama.cpp Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com>
2024-06-03 20:03:26 +03:00 · 2024-06-03 20:03:26 +03:00 · bde7cd3cd9
commit bde7cd3cd9
parent a5735e4426
6 changed files with 86 additions and 53 deletions
--- a/ggml-rpc.cpp
+++ b/ggml-rpc.cpp
@ -491,7 +491,7 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_rpc_buffer_type_alloc_buffer
    if (remote_ptr != 0) {
        ggml_backend_buffer_t buffer = ggml_backend_buffer_init(buft,
            ggml_backend_rpc_buffer_interface,
-            new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC"},
+            new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC[" + std::string(buft_ctx->endpoint) + "]"},
            remote_size);
        return buffer;
    } else {
@ -692,7 +692,7 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const
 GGML_CALL ggml_backend_t ggml_backend_rpc_init(const char * endpoint) {
    ggml_backend_rpc_context * ctx = new ggml_backend_rpc_context {
        /* .endpoint  = */ endpoint,
-        /* .name      = */ "RPC",
+        /* .name      = */ "RPC[" + std::string(endpoint) + "]",
    };

    ggml_backend_t backend = new ggml_backend {