llama : offload to RPC in addition to other backends (#7640)

* llama : offload to RPC in addition to other backends

* - fix copy_tensor being called on the src buffer instead of the dst buffer

- always initialize views in the view_src buffer

- add RPC backend to Makefile build

- add endpoint to all RPC object names

* add rpc-server to Makefile

* Update llama.cpp

Co-authored-by: slaren <slarengh@gmail.com>

---------

Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
Radoslav Gerganov 2024-06-03 20:03:26 +03:00 committed by GitHub
parent a5735e4426
commit bde7cd3cd9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 86 additions and 53 deletions

View file

@ -750,7 +750,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
// this tensor was allocated without ggml-backend
return;
}
ggml_backend_view_init(galloc->buffers[buffer_id], tensor);
ggml_backend_view_init(tensor);
}
} else {
if (tensor->data == NULL) {
@ -899,12 +899,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
if (t->view_src == NULL) {
ggml_tallocr_alloc(&tallocr, t);
} else if (t->buffer == NULL) {
ggml_backend_view_init(buffer, t);
ggml_backend_view_init(t);
}
} else {
if (t->view_src != NULL && t->buffer == NULL) {
// view of a pre-allocated tensor
ggml_backend_view_init(buffer, t);
ggml_backend_view_init(t);
}
}
}