Apply suggestions from code review

Co-authored-by: Diego Devesa <slarengh@gmail.com>
2025-01-03 22:46:35 +00:00 · 2025-01-03 22:46:35 +00:00 · 4973a298b6
commit 4973a298b6
parent c111e8a5b2
1 changed files with 3 additions and 3 deletions
--- a/ggml/src/ggml-rpc/ggml-rpc.cpp
+++ b/ggml/src/ggml-rpc/ggml-rpc.cpp
@ -479,7 +479,7 @@ static void ggml_backend_rpc_buffer_init_tensor(ggml_backend_buffer_t buffer, gg

    // CUDA backend on the server pads everything to 512 due to CUDA limitations.
    // Due to bandwidth constraints, we only call the server init tensor functions if necessary.
-    // In particular, this is tensors with padding that needs to be cleared, so base tensors only and only misaligned.
+    // In particular, only quantized tensors need padding
    if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0) && (tensor->view_src == nullptr)) {
        rpc_msg_init_tensor_req request;

@ -600,7 +600,7 @@ static size_t ggml_backend_rpc_get_max_size(ggml_backend_buffer_type_t buft) {

 static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
    // See comments in init_tensor.
-    if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0)) {
+    if (ggml_is_quantized(tensor->type) && (tensor->ne[0] % 512 != 0) && (tensor->view_src == nullptr)) {
        ggml_backend_rpc_buffer_type_context * buft_ctx = (ggml_backend_rpc_buffer_type_context *)buft->context;
        auto sock = get_socket(buft_ctx->endpoint);

@ -996,7 +996,7 @@ bool rpc_server::init_tensor(const rpc_msg_init_tensor_req & request) {
        GGML_LOG_ERROR("Null buffer for tensor passed to init_tensor function\n");
    }

-    if(tensor->extra != nullptr) {
+    if (tensor->extra != nullptr) {
        // This pointer can either be passed around client/server, or probably better stored server-side and kept track of.
        // Currently unimplemented.
        GGML_LOG_ERROR("tensor->extra populated by the backend, this is currently unsupported.\n");