llama : offload to RPC in addition to other backends (#7640)
* llama : offload to RPC in addition to other backends * - fix copy_tensor being called on the src buffer instead of the dst buffer - always initialize views in the view_src buffer - add RPC backend to Makefile build - add endpoint to all RPC object names * add rpc-server to Makefile * Update llama.cpp Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
parent
a5735e4426
commit
bde7cd3cd9
6 changed files with 86 additions and 53 deletions
|
@ -491,7 +491,7 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_rpc_buffer_type_alloc_buffer
|
|||
if (remote_ptr != 0) {
|
||||
ggml_backend_buffer_t buffer = ggml_backend_buffer_init(buft,
|
||||
ggml_backend_rpc_buffer_interface,
|
||||
new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC"},
|
||||
new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC[" + std::string(buft_ctx->endpoint) + "]"},
|
||||
remote_size);
|
||||
return buffer;
|
||||
} else {
|
||||
|
@ -692,7 +692,7 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const
|
|||
GGML_CALL ggml_backend_t ggml_backend_rpc_init(const char * endpoint) {
|
||||
ggml_backend_rpc_context * ctx = new ggml_backend_rpc_context {
|
||||
/* .endpoint = */ endpoint,
|
||||
/* .name = */ "RPC",
|
||||
/* .name = */ "RPC[" + std::string(endpoint) + "]",
|
||||
};
|
||||
|
||||
ggml_backend_t backend = new ggml_backend {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue