From 4973a298b659d16aa27c1b89f59037c0783c19e3 Mon Sep 17 00:00:00 2001 From: matt23654 Date: Fri, 3 Jan 2025 22:46:35 +0000 Subject: [PATCH] Apply suggestions from code review Co-authored-by: Diego Devesa --- ggml/src/ggml-rpc/ggml-rpc.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 030f02791..2213aba9f 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -479,7 +479,7 @@ static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, gg // CUDA backend on the server pads everything to 512 due to CUDA limitations. // Due to bandwidth constraints, we only call the server init tensor functions if necessary. - // In particular, this is tensors with padding that needs to be cleared, so base tensors only and only misaligned. + // In particular, only quantized tensors need padding if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0) && (tensor->view_src == nullptr)) { rpc_msg_init_tensor_req request; @@ -600,7 +600,7 @@ static size_t ggml_backend_rpc_get_max_size(ggml_backend_buffer_type_t buft) { static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { // See comments in init_tensor. - if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0)) { + if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0) && (tensor->view_src == nullptr)) { ggml_backend_rpc_buffer_type_context * buft_ctx = (ggml_backend_rpc_buffer_type_context *)buft->context; auto sock = get_socket(buft_ctx->endpoint); @@ -996,7 +996,7 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) { GGML_LOG_ERROR("Null buffer for tensor passed to init_tensor function\n"); } - if(tensor->extra != nullptr) { + if (tensor->extra != nullptr) { // This pointer can either be passed around client/server, or probably better stored server-side and kept track of. // Currently unimplemented. GGML_LOG_ERROR("tensor->extra populated by the backend, this is currently unsupported.\n");