llama : offload to RPC in addition to other backends (#7640)

* llama : offload to RPC in addition to other backends

* - fix copy_tensor being called on the src buffer instead of the dst buffer

- always initialize views in the view_src buffer

- add RPC backend to Makefile build

- add endpoint to all RPC object names

* add rpc-server to Makefile

* Update llama.cpp

Co-authored-by: slaren <slarengh@gmail.com>

---------

Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
Radoslav Gerganov 2024-06-03 20:03:26 +03:00 committed by GitHub
parent a5735e4426
commit bde7cd3cd9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 86 additions and 53 deletions

View file

@ -151,7 +151,7 @@ void ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) {
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst) {
ggml_backend_buffer_t dst_buf = dst->view_src ? dst->view_src->buffer : dst->buffer;
if (dst_buf->iface.cpy_tensor) {
return src->buffer->iface.cpy_tensor(dst_buf, src, dst);
return dst_buf->iface.cpy_tensor(dst_buf, src, dst);
}
return false;
}
@ -1887,15 +1887,15 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,
// utils
void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
void ggml_backend_view_init(struct ggml_tensor * tensor) {
GGML_ASSERT(tensor->buffer == NULL);
GGML_ASSERT(tensor->view_src != NULL);
GGML_ASSERT(tensor->view_src->buffer != NULL);
GGML_ASSERT(tensor->view_src->data != NULL);
tensor->buffer = buffer;
tensor->buffer = tensor->view_src->buffer;
tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
ggml_backend_buffer_init_tensor(buffer, tensor);
ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
}
void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
@ -1954,7 +1954,7 @@ static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_te
struct ggml_tensor * dst = node_copies[id];
if (dst->view_src != NULL) {
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
ggml_backend_view_init(dst->view_src->buffer, dst);
ggml_backend_view_init(dst);
}
else {
ggml_backend_tensor_copy(src, dst);